The apply family of functions

Kevin Keenan

Belfast R Users

06/06/13

What is a loop?

'for' loops


# create a variable
x <- seq(from = 10, to = 100, by = 10)

# loop construct
for (i in x) {
    print(i)
}
[1] 10
[1] 20
[1] 30
[1] 40
[1] 50
[1] 60
[1] 70
[1] 80
[1] 90
[1] 100

While loops (Beware the infinite loop)


# Create a start variable
x <- 10

# while construct
while (x <= 100) {
    print(x)
    x <- x + 10
}
[1] 10
[1] 20
[1] 30
[1] 40
[1] 50
[1] 60
[1] 70
[1] 80
[1] 90
[1] 100

repeat loops (Beware the infinite loop)

# create a starting variable
x <- 10

# repeat loop construct
repeat {
    if (x > 100) {
        break
    } else {
        print(x)
        x <- x + 10
    }
}
[1] 10
[1] 20
[1] 30
[1] 40
[1] 50
[1] 60
[1] 70
[1] 80
[1] 90
[1] 100

The apply family

Some examples

apply

# Create a matrix
x <- matrix(rnorm(100), ncol = 5)

# Check the matrix
head(x)
        [,1]     [,2]    [,3]     [,4]     [,5]
[1,] -0.7810 -0.86850 -0.8397 -0.08977  0.02139
[2,] -0.6308  0.04455 -1.0886  1.71665 -1.01455
[3,]  0.1558 -1.01081  0.6283  1.13896 -0.82688
[4,] -0.2202  2.87770 -1.1257  1.91574 -0.40827
[5,] -1.5348  0.01791 -1.1674  0.80576 -0.31821
[6,]  1.0471 -2.50138  1.0056 -1.66040 -1.95442
# Calculate the sd of each column in a matrix
sds <- apply(x, MARGIN = 2, FUN = sd)

# print sds
sds
[1] 0.9268 1.3391 1.0247 0.9492 1.0228

sapply

# create a list of 5 vectors of 20 elements
x <- list()
for (i in 1:5) {
    x[[i]] <- rnorm(20)
}

# Calculate sd for each vector
sds <- sapply(x, sd)

# print sds
sds
[1] 1.0999 0.9354 1.0922 0.6278 0.8309

lapply

# create a list of 5 vectors of 20 elements
x <- list()
for (i in 1:5) {
    x[[i]] <- rnorm(20)
}

# Calculate sd for each vector
sds <- lapply(x, sd)

# print sds
sds
[[1]]
[1] 0.8884

[[2]]
[1] 0.9568

[[3]]
[1] 1.33

[[4]]
[1] 1.132

[[5]]
[1] 1.034

tapply

# create a dataframe of male and female heights
x <- data.frame(gender = sample(c("m", "f"), 50, replace = TRUE), height = rnorm(50, 
    mean = 160))

# inspect data
head(x)
  gender height
1      f  161.7
2      m  162.1
3      f  161.3
4      m  160.2
5      m  157.1
6      m  159.5
# calculate the mean for each sex
mn_height <- tapply(x$height, INDEX = x$gender, FUN = mean)

# print means
mn_height
    f     m 
160.5 160.0 

Why apply functions?

How to scale R to parallel systems

Packages required

# loading doParallel package also loads parallel, foreach and iterate
# packages
library("doParallel")
Loading required package: foreach
Loading required package: iterators
Loading required package: parallel

Dectect system cores

# use function detectCores()

ncores <- detectCores()

ncores
[1] 4

Setup a multi-core cluster

# Make cluster object of size ncores

cl <- makeCluster(ncores)

# Register the parallel backend with foreach package
registerDoParallel(cl)

Now we are ready to go

General uses

results <- parSapply(cl, x, FUN)

Examples