class: center, middle, inverse, title-slide # Data Processing in R ## With the {tidyverse} ### Michael Jones ### 2021-08-25 --- class: center, middle, inverse <style type="text/css"> .highlight-last-item > ul > li, .highlight-last-item > ol > li { opacity: 0.5; } .highlight-last-item > ul > li:last-of-type, .highlight-last-item > ol > li:last-of-type { opacity: 1; } /* custom.css */ .left-code { color: #777; width: 38%; height: 92%; float: left; } .right-plot { width: 60%; float: right; padding-left: 1%; } </style> # A quick note<br>on **Composition** --- class: center, middle # `\(f(g(x)) = (f \circ g)(x)\)` --- class: highlight-last-item, center - `f(g(x)) = g(x) %>% f()` -- - `f(g(x)) = x %>% g() %>% f()` -- - `f(x, y) = x %>% f(y)` -- - `f(x, y) = x |> f(y)` --- # Traditional Cake ```r sprinkle( sandwich( bake( mix( what = ingredients, in = "bowl" ) in = "oven", at = 180 ) between = "jam", ) with = "sugar" ) ``` --- # Saving your intermediate steps ```r batter <- mix(what = ingredients, in = "bowl") sponge <- bake(batter, in = "oven", at = 180) assembled_cake <- sandwich(sponge, between = "jam") decorated_cake <- sprinkle(assembled_cake, with = "sugar") ``` --- # Piped Cake ```r mix(what = ingredients, in = "bowl") %>% bake(in = "oven", at = 180) %>% sandwich(between = "jam") %>% sprinkle(with = "sugar") ``` --- class: center, middle, inverse # {tidyverse} --- ```r library(tidyverse) ``` ``` ## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ── ``` ``` ## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4 ## ✓ tibble 3.1.4 ✓ dplyr 1.0.7 ## ✓ tidyr 1.1.3 ✓ stringr 1.4.0 ## ✓ readr 2.0.1 ✓ forcats 0.5.1 ``` ``` ## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ── ## x dplyr::filter() masks stats::filter() ## x dplyr::lag() masks stats::lag() ``` --- # Other useful packages ```r library(readxl) library(janitor) ``` ``` ## ## Attaching package: 'janitor' ``` ``` ## The following objects are masked from 'package:stats': ## ## chisq.test, fisher.test ``` --- class: inverse, middle, center # Reading in<br>a CSV --- # Old Fashioned ```r read.csv("my_data.csv") ``` ``` ## id_column group fruit rating sampled ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` --- # Tidyverse Way ```r read_csv("my_data.csv") ``` ``` ## Rows: 8 Columns: 5 ``` ``` ## ── Column specification ──────────────────────────────────────────────────────── ## Delimiter: "," ## chr (2): group, fruit ## dbl (2): id_column, rating ## date (1): sampled ``` ``` ## ## ℹ Use `spec()` to retrieve the full column specification for this data. ## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message. ``` ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` --- # Tell R what you expect ```r ratings <- read_csv("my_data.csv", col_types = cols( id_column = col_double(), group = col_character(), fruit = col_character(), rating = col_double(), sampled = col_date(format = "") )) ``` ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` --- # Tell R what you expect ```r ratings_bad <- read_csv("my_data.csv", col_types = cols( id_column = col_double(), group = col_character(), fruit = col_double(), rating = col_double(), sampled = col_date(format = "") )) ``` --- # Taking a quick look at the data ```r ratings ``` ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` --- # Taking a quick look at the data ```r glimpse(ratings) ``` ``` ## Rows: 8 ## Columns: 5 ## $ id_column <dbl> 1, 2, 3, 4, 5, 6, 7, 8 ## $ group <chr> "A", "B", "A", "B", "A", "B", "A", "B" ## $ fruit <chr> "apple", "apricot", "avocado", "banana", "bell pepper", "bil… ## $ rating <dbl> 4, 3, 5, 1, 4, 1, 5, 1 ## $ sampled <date> 2021-07-05, 2021-07-13, 2021-07-09, 2021-07-26, 2021-07-22, … ``` --- class: center, inverse, middle # Key idea of<br>the tidyverse --- class: center, inverse, middle # Functions **accept** a data frame # Functions **return** a data frame --- # Selecting Columns ## <span class="hl kwd">select</span>(<dataframe>, <columns>) `\(\to\)` `data frame` --- count: false # Selecting Columns .panel1-selecting-rotate[ ```r ratings %>% * select(id_column) ``` ] .panel2-selecting-rotate[ ``` ## # A tibble: 8 × 1 ## id_column ## <dbl> ## 1 1 ## 2 2 ## 3 3 ## 4 4 ## 5 5 ## 6 6 ## 7 7 ## 8 8 ``` ] --- count: false # Selecting Columns .panel1-selecting-rotate[ ```r ratings %>% * select(id_column, group) ``` ] .panel2-selecting-rotate[ ``` ## # A tibble: 8 × 2 ## id_column group ## <dbl> <chr> ## 1 1 A ## 2 2 B ## 3 3 A ## 4 4 B ## 5 5 A ## 6 6 B ## 7 7 A ## 8 8 B ``` ] --- count: false # Selecting Columns .panel1-selecting-rotate[ ```r ratings %>% * select(id_column:rating) ``` ] .panel2-selecting-rotate[ ``` ## # A tibble: 8 × 4 ## id_column group fruit rating ## <dbl> <chr> <chr> <dbl> ## 1 1 A apple 4 ## 2 2 B apricot 3 ## 3 3 A avocado 5 ## 4 4 B banana 1 ## 5 5 A bell pepper 4 ## 6 6 B bilberry 1 ## 7 7 A blackberry 5 ## 8 8 B blackcurrant 1 ``` ] --- count: false # Selecting Columns .panel1-selecting-rotate[ ```r ratings %>% * select(-id_column) ``` ] .panel2-selecting-rotate[ ``` ## # A tibble: 8 × 4 ## group fruit rating sampled ## <chr> <chr> <dbl> <date> ## 1 A apple 4 2021-07-05 ## 2 B apricot 3 2021-07-13 ## 3 A avocado 5 2021-07-09 ## 4 B banana 1 2021-07-26 ## 5 A bell pepper 4 2021-07-22 ## 6 B bilberry 1 2021-07-31 ## 7 A blackberry 5 2021-07-01 ## 8 B blackcurrant 1 2021-07-18 ``` ] <style> .panel1-selecting-rotate { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-selecting-rotate { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-selecting-rotate { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- class: center, middle - *Yes*: `ratings %>% select(id_column)` - **No**: `ratings %>% select("id_column")` --- # Filtering Columns ## <span class="hl kwd">filter</span>(<dataframe>, <predicates>) `\(\to\)` `data frame` --- count: false # Filtering Columns .panel1-filtering-rotate[ ```r ratings %>% * filter(group == "A") ``` ] .panel2-filtering-rotate[ ``` ## # A tibble: 4 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 3 A avocado 5 2021-07-09 ## 3 5 A bell pepper 4 2021-07-22 ## 4 7 A blackberry 5 2021-07-01 ``` ] --- count: false # Filtering Columns .panel1-filtering-rotate[ ```r ratings %>% * filter(rating >= 3) ``` ] .panel2-filtering-rotate[ ``` ## # A tibble: 5 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 5 A bell pepper 4 2021-07-22 ## 5 7 A blackberry 5 2021-07-01 ``` ] --- count: false # Filtering Columns .panel1-filtering-rotate[ ```r ratings %>% * filter(sampled > as.Date("2021-07-15")) ``` ] .panel2-filtering-rotate[ ``` ## # A tibble: 4 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 4 B banana 1 2021-07-26 ## 2 5 A bell pepper 4 2021-07-22 ## 3 6 B bilberry 1 2021-07-31 ## 4 8 B blackcurrant 1 2021-07-18 ``` ] <style> .panel1-filtering-rotate { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-filtering-rotate { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-filtering-rotate { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- # Sorting Columns ## <span class="hl kwd">arrange</span>(<dataframe>, <columns>) `\(\to\)` `data frame` --- count: false # Sorting Columns .panel1-arranging-rotate[ ```r ratings %>% * arrange(rating) ``` ] .panel2-arranging-rotate[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 4 B banana 1 2021-07-26 ## 2 6 B bilberry 1 2021-07-31 ## 3 8 B blackcurrant 1 2021-07-18 ## 4 2 B apricot 3 2021-07-13 ## 5 1 A apple 4 2021-07-05 ## 6 5 A bell pepper 4 2021-07-22 ## 7 3 A avocado 5 2021-07-09 ## 8 7 A blackberry 5 2021-07-01 ``` ] --- count: false # Sorting Columns .panel1-arranging-rotate[ ```r ratings %>% * arrange(desc(rating)) ``` ] .panel2-arranging-rotate[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 3 A avocado 5 2021-07-09 ## 2 7 A blackberry 5 2021-07-01 ## 3 1 A apple 4 2021-07-05 ## 4 5 A bell pepper 4 2021-07-22 ## 5 2 B apricot 3 2021-07-13 ## 6 4 B banana 1 2021-07-26 ## 7 6 B bilberry 1 2021-07-31 ## 8 8 B blackcurrant 1 2021-07-18 ``` ] --- count: false # Sorting Columns .panel1-arranging-rotate[ ```r ratings %>% * arrange(group, sampled) ``` ] .panel2-arranging-rotate[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 7 A blackberry 5 2021-07-01 ## 2 1 A apple 4 2021-07-05 ## 3 3 A avocado 5 2021-07-09 ## 4 5 A bell pepper 4 2021-07-22 ## 5 2 B apricot 3 2021-07-13 ## 6 8 B blackcurrant 1 2021-07-18 ## 7 4 B banana 1 2021-07-26 ## 8 6 B bilberry 1 2021-07-31 ``` ] <style> .panel1-arranging-rotate { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-arranging-rotate { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-arranging-rotate { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- # Changing Columns ## <span class="hl kwd">mutate</span>(<dataframe>, <new_column> = <<span class="hl kwa">function</span>>) `\(\to\)` `data frame` --- count: false # Changing Columns .panel1-mutating-rotate[ ```r ratings %>% * mutate(rating = rating * 20) ``` ] .panel2-mutating-rotate[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 80 2021-07-05 ## 2 2 B apricot 60 2021-07-13 ## 3 3 A avocado 100 2021-07-09 ## 4 4 B banana 20 2021-07-26 ## 5 5 A bell pepper 80 2021-07-22 ## 6 6 B bilberry 20 2021-07-31 ## 7 7 A blackberry 100 2021-07-01 ## 8 8 B blackcurrant 20 2021-07-18 ``` ] --- count: false # Changing Columns .panel1-mutating-rotate[ ```r ratings %>% * mutate(fruit = str_to_upper(fruit)) ``` ] .panel2-mutating-rotate[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A APPLE 4 2021-07-05 ## 2 2 B APRICOT 3 2021-07-13 ## 3 3 A AVOCADO 5 2021-07-09 ## 4 4 B BANANA 1 2021-07-26 ## 5 5 A BELL PEPPER 4 2021-07-22 ## 6 6 B BILBERRY 1 2021-07-31 ## 7 7 A BLACKBERRY 5 2021-07-01 ## 8 8 B BLACKCURRANT 1 2021-07-18 ``` ] --- count: false # Changing Columns .panel1-mutating-rotate[ ```r ratings %>% * mutate(sampled = format(sampled, "%b %d"), rating = rating * 20) ``` ] .panel2-mutating-rotate[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <chr> ## 1 1 A apple 80 Jul 05 ## 2 2 B apricot 60 Jul 13 ## 3 3 A avocado 100 Jul 09 ## 4 4 B banana 20 Jul 26 ## 5 5 A bell pepper 80 Jul 22 ## 6 6 B bilberry 20 Jul 31 ## 7 7 A blackberry 100 Jul 01 ## 8 8 B blackcurrant 20 Jul 18 ``` ] --- count: false # Changing Columns .panel1-mutating-rotate[ ```r ratings %>% * mutate(new_col = 1:8) ``` ] .panel2-mutating-rotate[ ``` ## # A tibble: 8 × 6 ## id_column group fruit rating sampled new_col ## <dbl> <chr> <chr> <dbl> <date> <int> ## 1 1 A apple 4 2021-07-05 1 ## 2 2 B apricot 3 2021-07-13 2 ## 3 3 A avocado 5 2021-07-09 3 ## 4 4 B banana 1 2021-07-26 4 ## 5 5 A bell pepper 4 2021-07-22 5 ## 6 6 B bilberry 1 2021-07-31 6 ## 7 7 A blackberry 5 2021-07-01 7 ## 8 8 B blackcurrant 1 2021-07-18 8 ``` ] <style> .panel1-mutating-rotate { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-mutating-rotate { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-mutating-rotate { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # Chaining Functions Together .panel1-chaining-auto[ ```r *ratings ``` ] .panel2-chaining-auto[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` ] --- count: false # Chaining Functions Together .panel1-chaining-auto[ ```r ratings %>% * filter(group == "A") ``` ] .panel2-chaining-auto[ ``` ## # A tibble: 4 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 3 A avocado 5 2021-07-09 ## 3 5 A bell pepper 4 2021-07-22 ## 4 7 A blackberry 5 2021-07-01 ``` ] --- count: false # Chaining Functions Together .panel1-chaining-auto[ ```r ratings %>% filter(group == "A") %>% * select(fruit, sampled) ``` ] .panel2-chaining-auto[ ``` ## # A tibble: 4 × 2 ## fruit sampled ## <chr> <date> ## 1 apple 2021-07-05 ## 2 avocado 2021-07-09 ## 3 bell pepper 2021-07-22 ## 4 blackberry 2021-07-01 ``` ] --- count: false # Chaining Functions Together .panel1-chaining-auto[ ```r ratings %>% filter(group == "A") %>% select(fruit, sampled) %>% * mutate(month_name = format(sampled, "%b")) ``` ] .panel2-chaining-auto[ ``` ## # A tibble: 4 × 3 ## fruit sampled month_name ## <chr> <date> <chr> ## 1 apple 2021-07-05 Jul ## 2 avocado 2021-07-09 Jul ## 3 bell pepper 2021-07-22 Jul ## 4 blackberry 2021-07-01 Jul ``` ] --- count: false # Chaining Functions Together .panel1-chaining-auto[ ```r ratings %>% filter(group == "A") %>% select(fruit, sampled) %>% mutate(month_name = format(sampled, "%b")) %>% * arrange(desc(fruit)) ``` ] .panel2-chaining-auto[ ``` ## # A tibble: 4 × 3 ## fruit sampled month_name ## <chr> <date> <chr> ## 1 blackberry 2021-07-01 Jul ## 2 bell pepper 2021-07-22 Jul ## 3 avocado 2021-07-09 Jul ## 4 apple 2021-07-05 Jul ``` ] <style> .panel1-chaining-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-chaining-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-chaining-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- class: center, middle, inverse # Grouping and Summarising --- # Grouping and Summarising ## <span class="hl kwd">group_by</span>(<dataframe>, <columns>) `\(\to\)` `data frame` ## <span class="hl kwd">summarise</span>(<dataframe>, <new column> = <<span class="hl kwa">function</span>>) `\(\to\)` `data frame` --- # Grouping and Summarising ```r ratings %>% head(n = 2) ``` ``` ## # A tibble: 2 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ``` ```r ratings %>% group_by(group) %>% head(n = 2) ``` ``` ## # A tibble: 2 × 5 ## # Groups: group [2] ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ``` --- # Grouping and Summarising ```r ratings %>% summarise(mean = mean(rating)) ``` ``` ## # A tibble: 1 × 1 ## mean ## <dbl> ## 1 3 ``` ```r ratings %>% group_by(group) %>% summarise(mean = mean(rating)) ``` ``` ## # A tibble: 2 × 2 ## group mean ## <chr> <dbl> ## 1 A 4.5 ## 2 B 1.5 ``` --- # Counting ```r ratings %>% group_by(group) %>% summarise(count = n()) ``` ``` ## # A tibble: 2 × 2 ## group count ## <chr> <int> ## 1 A 4 ## 2 B 4 ``` ```r ratings %>% count(group, name = "count") ``` ``` ## # A tibble: 2 × 2 ## group count ## <chr> <int> ## 1 A 4 ## 2 B 4 ``` --- class: inverse, middle, center # tibbles within tibbles --- count: false # Nesting Data .panel1-nesting-auto[ ```r *ratings ``` ] .panel2-nesting-auto[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` ] --- count: false # Nesting Data .panel1-nesting-auto[ ```r ratings %>% * group_by(group) ``` ] .panel2-nesting-auto[ ``` ## # A tibble: 8 × 5 ## # Groups: group [2] ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` ] --- count: false # Nesting Data .panel1-nesting-auto[ ```r ratings %>% group_by(group) %>% * nest() ``` ] .panel2-nesting-auto[ ``` ## # A tibble: 2 × 2 ## # Groups: group [2] ## group data ## <chr> <list> ## 1 A <tibble [4 × 4]> ## 2 B <tibble [4 × 4]> ``` ] <style> .panel1-nesting-auto { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-nesting-auto { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-nesting-auto { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # Nesting Data .panel1-nest_explanation-user[ ```r *(subset <- ratings %>% * filter(group == "A")) ``` ] .panel2-nest_explanation-user[ ``` ## # A tibble: 4 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 3 A avocado 5 2021-07-09 ## 3 5 A bell pepper 4 2021-07-22 ## 4 7 A blackberry 5 2021-07-01 ``` ] --- count: false # Nesting Data .panel1-nest_explanation-user[ ```r (subset <- ratings %>% filter(group == "A")) *my_func <- function(df) { * f <- df %>% * filter(rating == max(rating)) %>% * slice(1) %>% * pull(fruit) * paste("The highest rated fruit is", f) *} ``` ] .panel2-nest_explanation-user[ ``` ## # A tibble: 4 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 3 A avocado 5 2021-07-09 ## 3 5 A bell pepper 4 2021-07-22 ## 4 7 A blackberry 5 2021-07-01 ``` ] --- count: false # Nesting Data .panel1-nest_explanation-user[ ```r (subset <- ratings %>% filter(group == "A")) my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f) } *my_func(subset) ``` ] .panel2-nest_explanation-user[ ``` ## # A tibble: 4 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 3 A avocado 5 2021-07-09 ## 3 5 A bell pepper 4 2021-07-22 ## 4 7 A blackberry 5 2021-07-01 ``` ``` ## [1] "The highest rated fruit is avocado" ``` ] <style> .panel1-nest_explanation-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-nest_explanation-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-nest_explanation-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # Nesting Data .panel1-nest_explanation2-user[ ```r *my_func <- function(df) { * f <- df %>% * filter(rating == max(rating)) %>% * slice(1) %>% * pull(fruit) * paste("The highest rated fruit is", f) *} ``` ] .panel2-nest_explanation2-user[ ] --- count: false # Nesting Data .panel1-nest_explanation2-user[ ```r my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f) } *ratings %>% * group_by(group) %>% * nest() ``` ] .panel2-nest_explanation2-user[ ``` ## # A tibble: 2 × 2 ## # Groups: group [2] ## group data ## <chr> <list> ## 1 A <tibble [4 × 4]> ## 2 B <tibble [4 × 4]> ``` ] --- count: false # Nesting Data .panel1-nest_explanation2-user[ ```r my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f) } ratings %>% group_by(group) %>% nest() %>% * mutate(description = map(data, my_func)) ``` ] .panel2-nest_explanation2-user[ ``` ## # A tibble: 2 × 3 ## # Groups: group [2] ## group data description ## <chr> <list> <list> ## 1 A <tibble [4 × 4]> <chr [1]> ## 2 B <tibble [4 × 4]> <chr [1]> ``` ] --- count: false # Nesting Data .panel1-nest_explanation2-user[ ```r my_func <- function(df) { f <- df %>% filter(rating == max(rating)) %>% slice(1) %>% pull(fruit) paste("The highest rated fruit is", f) } ratings %>% group_by(group) %>% nest() %>% mutate(description = map(data, my_func)) %>% * unnest(description) ``` ] .panel2-nest_explanation2-user[ ``` ## # A tibble: 2 × 3 ## # Groups: group [2] ## group data description ## <chr> <list> <chr> ## 1 A <tibble [4 × 4]> The highest rated fruit is avocado ## 2 B <tibble [4 × 4]> The highest rated fruit is apricot ``` ] <style> .panel1-nest_explanation2-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-nest_explanation2-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-nest_explanation2-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # More involved nesting .panel1-gapminder1-user[ ```r *library(gapminder) ``` ] .panel2-gapminder1-user[ ] --- count: false # More involved nesting .panel1-gapminder1-user[ ```r library(gapminder) *lm_fit <- function(df) { * lm(lifeExp ~ year, data = df) *} ``` ] .panel2-gapminder1-user[ ] --- count: false # More involved nesting .panel1-gapminder1-user[ ```r library(gapminder) lm_fit <- function(df) { lm(lifeExp ~ year, data = df) } *plot_graph <- function(df) { * ggplot(df, aes(x = year, y = lifeExp)) + * geom_point() + * geom_smooth(method = "lm") *} ``` ] .panel2-gapminder1-user[ ] --- count: false # More involved nesting .panel1-gapminder1-user[ ```r library(gapminder) lm_fit <- function(df) { lm(lifeExp ~ year, data = df) } plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm") } *gapminder ``` ] .panel2-gapminder1-user[ ``` ## # A tibble: 1,704 × 6 ## country continent year lifeExp pop gdpPercap ## <fct> <fct> <int> <dbl> <int> <dbl> ## 1 Afghanistan Asia 1952 28.8 8425333 779. ## 2 Afghanistan Asia 1957 30.3 9240934 821. ## 3 Afghanistan Asia 1962 32.0 10267083 853. ## 4 Afghanistan Asia 1967 34.0 11537966 836. ## 5 Afghanistan Asia 1972 36.1 13079460 740. ## 6 Afghanistan Asia 1977 38.4 14880372 786. ## 7 Afghanistan Asia 1982 39.9 12881816 978. ## 8 Afghanistan Asia 1987 40.8 13867957 852. ## 9 Afghanistan Asia 1992 41.7 16317921 649. ## 10 Afghanistan Asia 1997 41.8 22227415 635. ## # … with 1,694 more rows ``` ] --- count: false # More involved nesting .panel1-gapminder1-user[ ```r library(gapminder) lm_fit <- function(df) { lm(lifeExp ~ year, data = df) } plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm") } gapminder %>% * group_by(country) %>% * nest() ``` ] .panel2-gapminder1-user[ ``` ## # A tibble: 142 × 2 ## # Groups: country [142] ## country data ## <fct> <list> ## 1 Afghanistan <tibble [12 × 5]> ## 2 Albania <tibble [12 × 5]> ## 3 Algeria <tibble [12 × 5]> ## 4 Angola <tibble [12 × 5]> ## 5 Argentina <tibble [12 × 5]> ## 6 Australia <tibble [12 × 5]> ## 7 Austria <tibble [12 × 5]> ## 8 Bahrain <tibble [12 × 5]> ## 9 Bangladesh <tibble [12 × 5]> ## 10 Belgium <tibble [12 × 5]> ## # … with 132 more rows ``` ] --- count: false # More involved nesting .panel1-gapminder1-user[ ```r library(gapminder) lm_fit <- function(df) { lm(lifeExp ~ year, data = df) } plot_graph <- function(df) { ggplot(df, aes(x = year, y = lifeExp)) + geom_point() + geom_smooth(method = "lm") } gapminder %>% group_by(country) %>% nest() %>% * mutate(model = map(data, lm_fit), * plot = map(data, plot_graph)) ``` ] .panel2-gapminder1-user[ ``` ## # A tibble: 142 × 4 ## # Groups: country [142] ## country data model plot ## <fct> <list> <list> <list> ## 1 Afghanistan <tibble [12 × 5]> <lm> <gg> ## 2 Albania <tibble [12 × 5]> <lm> <gg> ## 3 Algeria <tibble [12 × 5]> <lm> <gg> ## 4 Angola <tibble [12 × 5]> <lm> <gg> ## 5 Argentina <tibble [12 × 5]> <lm> <gg> ## 6 Australia <tibble [12 × 5]> <lm> <gg> ## 7 Austria <tibble [12 × 5]> <lm> <gg> ## 8 Bahrain <tibble [12 × 5]> <lm> <gg> ## 9 Bangladesh <tibble [12 × 5]> <lm> <gg> ## 10 Belgium <tibble [12 × 5]> <lm> <gg> ## # … with 132 more rows ``` ] <style> .panel1-gapminder1-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-gapminder1-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-gapminder1-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # More involved nesting .panel1-gapminder_extract-rotate[ ```r gapminder_modified %>% filter(country == "United Kingdom") %>% * pull(model) ``` ] .panel2-gapminder_extract-rotate[ ``` ## [[1]] ## ## Call: ## lm(formula = lifeExp ~ year, data = df) ## ## Coefficients: ## (Intercept) year ## -294.197 0.186 ``` ] --- count: false # More involved nesting .panel1-gapminder_extract-rotate[ ```r gapminder_modified %>% filter(country == "United Kingdom") %>% * pull(plot) ``` ] .panel2-gapminder_extract-rotate[ ``` ## [[1]] ``` ``` ## `geom_smooth()` using formula 'y ~ x' ``` ![](data:image/png;base64,#data_manipulation_files/figure-html/gapminder_extract_rotate_02_output-1.png)<!-- --> ] <style> .panel1-gapminder_extract-rotate { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-gapminder_extract-rotate { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-gapminder_extract-rotate { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- class: center, inverse, middle # Reshape your data --- count: false # Pivoting Longer .panel1-pivot_longer-user[ ```r *gapminder %>% * select(-c(continent)) ``` ] .panel2-pivot_longer-user[ ``` ## # A tibble: 1,704 × 5 ## country year lifeExp pop gdpPercap ## <fct> <int> <dbl> <int> <dbl> ## 1 Afghanistan 1952 28.8 8425333 779. ## 2 Afghanistan 1957 30.3 9240934 821. ## 3 Afghanistan 1962 32.0 10267083 853. ## 4 Afghanistan 1967 34.0 11537966 836. ## 5 Afghanistan 1972 36.1 13079460 740. ## 6 Afghanistan 1977 38.4 14880372 786. ## 7 Afghanistan 1982 39.9 12881816 978. ## 8 Afghanistan 1987 40.8 13867957 852. ## 9 Afghanistan 1992 41.7 16317921 649. ## 10 Afghanistan 1997 41.8 22227415 635. ## # … with 1,694 more rows ``` ] --- count: false # Pivoting Longer .panel1-pivot_longer-user[ ```r gapminder %>% select(-c(continent)) %>% * pivot_longer(cols = -c(country, year), * names_to = "stat", * values_to = "value") ``` ] .panel2-pivot_longer-user[ ``` ## # A tibble: 5,112 × 4 ## country year stat value ## <fct> <int> <chr> <dbl> ## 1 Afghanistan 1952 lifeExp 28.8 ## 2 Afghanistan 1952 pop 8425333 ## 3 Afghanistan 1952 gdpPercap 779. ## 4 Afghanistan 1957 lifeExp 30.3 ## 5 Afghanistan 1957 pop 9240934 ## 6 Afghanistan 1957 gdpPercap 821. ## 7 Afghanistan 1962 lifeExp 32.0 ## 8 Afghanistan 1962 pop 10267083 ## 9 Afghanistan 1962 gdpPercap 853. ## 10 Afghanistan 1967 lifeExp 34.0 ## # … with 5,102 more rows ``` ] <style> .panel1-pivot_longer-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-pivot_longer-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-pivot_longer-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # Pivoting Wider .panel1-pivot_wider-user[ ```r *gapminder %>% * select(country, year, pop) ``` ] .panel2-pivot_wider-user[ ``` ## # A tibble: 1,704 × 3 ## country year pop ## <fct> <int> <int> ## 1 Afghanistan 1952 8425333 ## 2 Afghanistan 1957 9240934 ## 3 Afghanistan 1962 10267083 ## 4 Afghanistan 1967 11537966 ## 5 Afghanistan 1972 13079460 ## 6 Afghanistan 1977 14880372 ## 7 Afghanistan 1982 12881816 ## 8 Afghanistan 1987 13867957 ## 9 Afghanistan 1992 16317921 ## 10 Afghanistan 1997 22227415 ## # … with 1,694 more rows ``` ] --- count: false # Pivoting Wider .panel1-pivot_wider-user[ ```r gapminder %>% select(country, year, pop) %>% * pivot_wider(names_from = "year", * values_from = "pop") ``` ] .panel2-pivot_wider-user[ ``` ## # A tibble: 142 × 13 ## country `1952` `1957` `1962` `1967` `1972` `1977` `1982` `1987` `1992` `1997` ## <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> ## 1 Afghan… 8.43e6 9.24e6 1.03e7 1.15e7 1.31e7 1.49e7 1.29e7 1.39e7 1.63e7 2.22e7 ## 2 Albania 1.28e6 1.48e6 1.73e6 1.98e6 2.26e6 2.51e6 2.78e6 3.08e6 3.33e6 3.43e6 ## 3 Algeria 9.28e6 1.03e7 1.10e7 1.28e7 1.48e7 1.72e7 2.00e7 2.33e7 2.63e7 2.91e7 ## 4 Angola 4.23e6 4.56e6 4.83e6 5.25e6 5.89e6 6.16e6 7.02e6 7.87e6 8.74e6 9.88e6 ## 5 Argent… 1.79e7 1.96e7 2.13e7 2.29e7 2.48e7 2.70e7 2.93e7 3.16e7 3.40e7 3.62e7 ## 6 Austra… 8.69e6 9.71e6 1.08e7 1.19e7 1.32e7 1.41e7 1.52e7 1.63e7 1.75e7 1.86e7 ## 7 Austria 6.93e6 6.97e6 7.13e6 7.38e6 7.54e6 7.57e6 7.57e6 7.58e6 7.91e6 8.07e6 ## 8 Bahrain 1.20e5 1.39e5 1.72e5 2.02e5 2.31e5 2.97e5 3.78e5 4.55e5 5.29e5 5.99e5 ## 9 Bangla… 4.69e7 5.14e7 5.68e7 6.28e7 7.08e7 8.04e7 9.31e7 1.04e8 1.14e8 1.23e8 ## 10 Belgium 8.73e6 8.99e6 9.22e6 9.56e6 9.71e6 9.82e6 9.86e6 9.87e6 1.00e7 1.02e7 ## # … with 132 more rows, and 2 more variables: 2002 <int>, 2007 <int> ``` ] <style> .panel1-pivot_wider-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-pivot_wider-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-pivot_wider-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- class: inverse, middle, center # Combining Data --- # New data .pull-left[ ```r group_details ``` ``` ## # A tibble: 4 × 2 ## group time ## <chr> <chr> ## 1 A morning ## 2 B lunchtime ## 3 C afternoon ## 4 D evening ``` More groups than in `ratings` ] .pull-right[ ```r fruit_details ``` ``` ## # A tibble: 3 × 2 ## fruit price ## <chr> <dbl> ## 1 apple 2 ## 2 banana 4 ## 3 blackberry 6 ``` Fewer fruits than in `ratings` ] --- count: false # Left Join .panel1-leftjoin-user[ ```r *ratings ``` ] .panel2-leftjoin-user[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` ] --- count: false # Left Join .panel1-leftjoin-user[ ```r ratings %>% * left_join(group_details, by = "group") ``` ] .panel2-leftjoin-user[ ``` ## # A tibble: 8 × 6 ## id_column group fruit rating sampled time ## <dbl> <chr> <chr> <dbl> <date> <chr> ## 1 1 A apple 4 2021-07-05 morning ## 2 2 B apricot 3 2021-07-13 lunchtime ## 3 3 A avocado 5 2021-07-09 morning ## 4 4 B banana 1 2021-07-26 lunchtime ## 5 5 A bell pepper 4 2021-07-22 morning ## 6 6 B bilberry 1 2021-07-31 lunchtime ## 7 7 A blackberry 5 2021-07-01 morning ## 8 8 B blackcurrant 1 2021-07-18 lunchtime ``` ] <style> .panel1-leftjoin-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-leftjoin-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-leftjoin-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # Left Join .panel1-leftjoin2-user[ ```r *ratings ``` ] .panel2-leftjoin2-user[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` ] --- count: false # Left Join .panel1-leftjoin2-user[ ```r ratings %>% * left_join(fruit_details, by = "fruit") ``` ] .panel2-leftjoin2-user[ ``` ## # A tibble: 8 × 6 ## id_column group fruit rating sampled price ## <dbl> <chr> <chr> <dbl> <date> <dbl> ## 1 1 A apple 4 2021-07-05 2 ## 2 2 B apricot 3 2021-07-13 NA ## 3 3 A avocado 5 2021-07-09 NA ## 4 4 B banana 1 2021-07-26 4 ## 5 5 A bell pepper 4 2021-07-22 NA ## 6 6 B bilberry 1 2021-07-31 NA ## 7 7 A blackberry 5 2021-07-01 6 ## 8 8 B blackcurrant 1 2021-07-18 NA ``` ] <style> .panel1-leftjoin2-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-leftjoin2-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-leftjoin2-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # Inner Join .panel1-innerjoin-user[ ```r *ratings ``` ] .panel2-innerjoin-user[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` ] --- count: false # Inner Join .panel1-innerjoin-user[ ```r ratings %>% * inner_join(fruit_details, by = "fruit") ``` ] .panel2-innerjoin-user[ ``` ## # A tibble: 3 × 6 ## id_column group fruit rating sampled price ## <dbl> <chr> <chr> <dbl> <date> <dbl> ## 1 1 A apple 4 2021-07-05 2 ## 2 4 B banana 1 2021-07-26 4 ## 3 7 A blackberry 5 2021-07-01 6 ``` ] <style> .panel1-innerjoin-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-innerjoin-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-innerjoin-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- count: false # Full Join .panel1-fulljoin-user[ ```r *ratings ``` ] .panel2-fulljoin-user[ ``` ## # A tibble: 8 × 5 ## id_column group fruit rating sampled ## <dbl> <chr> <chr> <dbl> <date> ## 1 1 A apple 4 2021-07-05 ## 2 2 B apricot 3 2021-07-13 ## 3 3 A avocado 5 2021-07-09 ## 4 4 B banana 1 2021-07-26 ## 5 5 A bell pepper 4 2021-07-22 ## 6 6 B bilberry 1 2021-07-31 ## 7 7 A blackberry 5 2021-07-01 ## 8 8 B blackcurrant 1 2021-07-18 ``` ] --- count: false # Full Join .panel1-fulljoin-user[ ```r ratings %>% * full_join(group_details, by = "group") ``` ] .panel2-fulljoin-user[ ``` ## # A tibble: 10 × 6 ## id_column group fruit rating sampled time ## <dbl> <chr> <chr> <dbl> <date> <chr> ## 1 1 A apple 4 2021-07-05 morning ## 2 2 B apricot 3 2021-07-13 lunchtime ## 3 3 A avocado 5 2021-07-09 morning ## 4 4 B banana 1 2021-07-26 lunchtime ## 5 5 A bell pepper 4 2021-07-22 morning ## 6 6 B bilberry 1 2021-07-31 lunchtime ## 7 7 A blackberry 5 2021-07-01 morning ## 8 8 B blackcurrant 1 2021-07-18 lunchtime ## 9 NA C <NA> NA NA afternoon ## 10 NA D <NA> NA NA evening ``` ] <style> .panel1-fulljoin-user { color: black; width: 38.6060606060606%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel2-fulljoin-user { color: black; width: 59.3939393939394%; hight: 32%; float: left; padding-left: 1%; font-size: 80% } .panel3-fulljoin-user { color: black; width: NA%; hight: 33%; float: left; padding-left: 1%; font-size: 80% } </style> --- class: inverse, middle, center # Key Takeaways --- - The tidyverse is big - Composition is a key part - Grouping and nesting is powerful - Writing tidyverse style functions is a little more involved