c(char = "hello", num = 1)
## char num ## "hello" "1"
list(char = "hello", num = 1, fun = mean)
## $char## [1] "hello"## ## $num## [1] 1## ## $fun## function (x, ...) ## UseMethod("mean")## <bytecode: 0x7fb922834d08>## <environment: namespace:base>
measurements <- list( blood_glucose = rnorm(10, mean = 140, sd = 10), age = rnorm(5, mean = 40, sd = 5), heartrate = rnorm(20, mean = 80, sd = 15))
blood_glucose
from measurements
using these approaches. Are they different? What about measurements[["blood_glucose"]]
?measurements["blood_glucose"]
## $blood_glucose## [1] 127.9293 142.7743 150.8444 116.5430 144.2912 145.0606 134.2526 134.5337 134.3555 131.0996
measurements$blood_glucose
## [1] 127.9293 142.7743 150.8444 116.5430 144.2912 145.0606 134.2526 134.5337 134.3555 131.0996
measurements[["blood_glucose"]]
## [1] 127.9293 142.7743 150.8444 116.5430 144.2912 145.0606 134.2526 134.5337 134.3555 131.0996
x <- list(char = "hello", num = 1)as.data.frame(x)
## char num## 1 hello 1
library(gapminder)head(gapminder$pop)
## [1] 8425333 9240934 10267083 11537966 13079460 14880372
gapminder[1:6, "pop"]
gapminder[1:6, "pop"]
## # A tibble: 6 x 1## pop## <int>## 1 8425333## 2 9240934## 3 10267083## 4 11537966## 5 13079460## 6 14880372
head(gapminder[["pop"]])
## [1] 8425333 9240934 10267083 11537966 13079460 14880372
sum(rnorm(10))
sum(rnorm(10))
## [1] -3.831574
sum(rnorm(10))
## [1] -3.831574
sum(list(x = rnorm(10), y = rnorm(10), z = rnorm(10)))
sum(rnorm(10))
## [1] -3.831574
sum(list(x = rnorm(10), y = rnorm(10), z = rnorm(10)))
## Error in sum(list(x = rnorm(10), y = rnorm(10), z = rnorm(10))): invalid 'type' (list) of argument
library(purrr)x_list <- list(x = rnorm(10), y = rnorm(10), z = rnorm(10))map(x_list, mean)
library(purrr)x_list <- list(x = rnorm(10), y = rnorm(10), z = rnorm(10))map(x_list, mean)
library(purrr)x_list <- list(x = rnorm(10), y = rnorm(10), z = rnorm(10))map(x_list, mean)
library(purrr)x_list <- list(x = rnorm(10), y = rnorm(10), z = rnorm(10))map(x_list, mean)
## $x## [1] -0.6097971## ## $y## [1] -0.2788647## ## $z## [1] 0.6165922
list( sum_blood_glucose = sum(measurements$blood_glucose), sum_age = sum(measurements$age), sum_heartrate = sum(measurements$heartrate))
map()
to create the same output.map(measurements, sum)
## $blood_glucose## [1] 1361.684## ## $age## [1] 193.8606## ## $heartrate## [1] 1509.304
map()
with data framesmap()
with data frameslibrary(dplyr)gapminder %>% select(where(is.numeric)) %>% map(sd)
map()
with data frameslibrary(dplyr)gapminder %>% select(where(is.numeric)) %>% map(sd)
map()
with data frameslibrary(dplyr)gapminder %>% select(where(is.numeric)) %>% map(sd)
map()
with data frameslibrary(dplyr)gapminder %>% select(where(is.numeric)) %>% map(sd)
## $year## [1] 17.26533## ## $lifeExp## [1] 12.91711## ## $pop## [1] 106157897## ## $gdpPercap## [1] 9857.455
map()
and map using class()
. What are these results telling you?head( map(diabetes, class), 3)
## $id## [1] "numeric"## ## $chol## [1] "numeric"## ## $stab.glu## [1] "numeric"
x <- x^2x <- scale(x)x <- max(x)
x <- x^2x <- scale(x)x <- max(x)y <- x^2y <- scale(y)y <- max(y)z <- z^2z <- scale(x)z <- max(z)
x <- x^2x <- scale(x)x <- max(x)y <- x^2y <- scale(y)y <- max(y)z <- z^2z <- scale(x)z <- max(z)
x <- x^3x <- scale(x)x <- max(x)y <- x^2y <- scale(y)y <- max(y)z <- z^2z <- scale(x)z <- max(z)
.f <- function(x) { x <- x^3 x <- scale(x) max(x)}.f(x).f(y).f(z)
x
measurements
mean_sd <- function(x) { x_mean <- mean(x) x_sd <- sd(x) tibble(mean = x_mean, sd = x_sd)}map(measurements, mean_sd)
## $blood_glucose## # A tibble: 1 x 2## mean sd## <dbl> <dbl>## 1 136. 9.96## ## $age## # A tibble: 1 x 2## mean sd## <dbl> <dbl>## 1 38.8 3.91## ## $heartrate## # A tibble: 1 x 2## mean sd## <dbl> <dbl>## 1 75.5 13.8
map()
map()
map(gapminder, ~length(unique(.x)))
map(gapminder, ~length(unique(.x)))
## $country## [1] 142## ## $continent## [1] 5## ## $year## [1] 12## ## $lifeExp## [1] 1626## ## $pop## [1] 1704## ## $gdpPercap## [1] 1704
map | returns |
---|---|
map() |
list |
map_chr() |
character vector |
map_dbl() |
double vector (numeric) |
map_int() |
integer vector |
map_lgl() |
logical vector |
map_dfc() |
data frame (by column) |
map_dfr() |
data frame (by row) |
map_int(gapminder, ~length(unique(.x)))
map_int(gapminder, ~length(unique(.x)))
## country continent year lifeExp pop gdpPercap ## 142 5 12 1626 1704 1704
map_chr(diabetes, class)
## id chol stab.glu hdl ratio glyhb location age gender height weight frame bp.1s bp.1d bp.2s ## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "character" "numeric" "character" "numeric" "numeric" "character" "numeric" "numeric" "numeric" ## bp.2d waist hip time.ppn ## "numeric" "numeric" "numeric" "numeric"
diabetes
for any missing data.is.na()
and any()
any()
? Why?map_lgl(diabetes, ~any(is.na(.x)))
## id chol stab.glu hdl ratio glyhb location age gender height weight frame bp.1s bp.1d bp.2s bp.2d waist hip time.ppn ## FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
map_int(diabetes, ~sum(is.na(.x)))
## id chol stab.glu hdl ratio glyhb location age gender height weight frame bp.1s bp.1d bp.2s bp.2d waist hip time.ppn ## 0 1 0 1 1 13 0 0 0 5 1 12 5 5 262 262 2 2 3
diabetes
into a list split by location
using the split()
function. Check its length.model_lm
function to model chol
(the outcome) with ratio
and pass the .data
argument to lm()
model_lm
to diabetes_list
so that it returns a data frame (by row).diabetes_list <- split(diabetes, diabetes$location)length(diabetes_list)model_lm <- function(.data) { mdl <- lm(chol ~ ratio, data = .data) # get model statistics broom::glance(mdl)}map(diabetes_list, model_lm)
## [1] 2
## $Buckingham## # A tibble: 1 x 12## r.squared adj.r.squared sigma statistic p.value df## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>## 1 0.252 0.248 38.8 66.4 4.11e-14 1## # … with 6 more variables: logLik <dbl>, AIC <dbl>,## # BIC <dbl>, deviance <dbl>, df.residual <int>,## # nobs <int>## ## $Louisa## # A tibble: 1 x 12## r.squared adj.r.squared sigma statistic p.value df## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>## 1 0.204 0.201 39.4 51.7 1.26e-11 1## # … with 6 more variables: logLik <dbl>, AIC <dbl>,## # BIC <dbl>, deviance <dbl>, df.residual <int>,## # nobs <int>
means <- c(-3, 4, 2, 2.3)sds <- c(.3, 4, 2, 1)map2_dbl(means, sds, rnorm, n = 1)
means <- c(-3, 4, 2, 2.3)sds <- c(.3, 4, 2, 1)map2_dbl(means, sds, rnorm, n = 1)
means <- c(-3, 4, 2, 2.3)sds <- c(.3, 4, 2, 1)map2_dbl(means, sds, rnorm, n = 1)
## [1] -2.997932 2.178125 1.266952 2.948287
map()
. For the first argument, pass gapminder_countries
. For the second, use the ~.f()
notation to write a model with lm()
. Use lifeExp
on the left hand side of the formula and year
on the second. Pass .x
to the data
argument.map2()
to take the models list and the data set list and map them to predict()
. Since we're not adding new arguments, you don't need to use ~.f()
.gapminder_countries <- split(gapminder, gapminder$country)models <- map(gapminder_countries, ~ lm(lifeExp ~ year, data = .x))preds <- map2(models, gapminder_countries, predict)head(preds, 3)
gapminder_countries <- split(gapminder, gapminder$country)models <- map(gapminder_countries, ~ lm(lifeExp ~ year, data = .x))preds <- map2(models, gapminder_countries, predict)head(preds, 3)
gapminder_countries <- split(gapminder, gapminder$country)models <- map(gapminder_countries, ~ lm(lifeExp ~ year, data = .x))preds <- map2(models, gapminder_countries, predict)head(preds, 3)
## $Afghanistan## 1 2 3 4 5 6 ## 29.90729 31.28394 32.66058 34.03722 35.41387 36.79051 ## ## $Albania## 1 2 3 4 5 6 ## 59.22913 60.90254 62.57596 64.24938 65.92279 67.59621 ## ## $Algeria## 1 2 3 4 5 6 ## 43.37497 46.22137 49.06777 51.91417 54.76057 57.60697
input 1 | input 2 | returns |
---|---|---|
map() |
map2() |
list |
map_chr() |
map2_chr() |
character vector |
map_dbl() |
map2_dbl() |
double vector (numeric) |
map_int() |
map2_int() |
integer vector |
map_lgl() |
map2_lgl() |
logical vector |
map_dfc() |
map2_dfc() |
data frame (by column) |
map_dfr() |
map2_dfr() |
data frame (by row) |
i
i
input 1 | input 2 | input n | returns |
---|---|---|---|
map() |
map2() |
pmap() |
list |
map_chr() |
map2_chr() |
pmap_chr() |
character vector |
map_dbl() |
map2_dbl() |
pmap_dbl() |
double vector (numeric) |
map_int() |
map2_int() |
pmap_int() |
integer vector |
map_lgl() |
map2_lgl() |
pmap_lgl() |
logical vector |
map_dfc() |
map2_dfc() |
pmap_dfc() |
data frame (by column) |
map_dfr() |
map2_dfr() |
pmap_dfr() |
data frame (by row) |
walk() |
walk2() |
pwalk() |
input (side effects!) |
"folder/variable_name.png"
walk()
to save a plot for each of the variablesfs::dir_create("figures")ggsave_gapminder <- function(variable) { # we're using `aes_string()` so we don't need the curly-curly syntax p <- ggplot( gapminder, aes_string(x = "year", y = variable, color = "country") ) + geom_line() + scale_color_manual(values = country_colors) + facet_wrap(vars(continent.)) + theme(legend.position = "none") ggsave( filename = paste0("figures/", variable, ".png"), plot = p, dpi = 320 )}
vars <- c("lifeExp", "pop", "gdpPercap")walk(vars, ggsave_gapminder)
base R | purrr |
---|---|
lapply() |
map() |
vapply() |
map_*() |
sapply() |
? |
x[] <- lapply() |
map_dfc() |
mapply() |
map2() , pmap() |
x <- rnorm(10)y <- map(x, mean)
x <- rnorm(10)y <- vector("list", length(x))for (i in seq_along(x)) { y[[i]] <- mean(x[[i]])}
x <- rnorm(10)y <- map(x, mean)
x <- rnorm(10) y <- vector("list", length(x))for (i in seq_along(x)) { y[[i]] <- mean(x[[i]]) }
x <- rnorm(10)y <- map(x, mean)
x <- rnorm(10)y <- vector("list", length(x)) for (i in seq_along(x)) { y[[i]] <- mean(x[[i]]) }
x <- rnorm(10)y <- map(x, mean)
x <- rnorm(10)y <- vector("list", length(x)) for (i in seq_along(x)) { y[[i]] <- mean(x[[i]])}
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |