Feb., 2021

Profiling Memory: Rprof

Rprof has tools for profiling memory:

siml <- function(l) {
 c <- rep(0,l); hits <- 0            #variables initialization
 listp <- as.list(seq(10000000))
 pow2 <- function(x) { x2 <- sqrt( x[1]*x[1]+x[2]*x[2] ); return(x2) }
 for(i in 1:l){
   x = runif(2,-1,1)
   if( pow2(x) <=1 ){ hits <- hits + 1 }
   dens <- hits/i; pi_partial = dens*4; c[i] = pi_partial
 }; return(c)
}

Profiling Memory: Rprof

size <- 1000000
Rprof("Rprof-mem.out", memory.profiling=TRUE)
res <- siml(size)
Rprof(NULL)

Profiling Memory: Rprof

summaryRprof("Rprof-mem.out", memory="both")
## $by.self
##                   self.time self.pct total.time total.pct mem.total
## "runif"                2.24    41.18       2.24     41.18    1703.2
## "as.list.default"      1.46    26.84       1.46     26.84     680.0
## "siml"                 1.10    20.22       5.44    100.00    3917.4
## "pow2"                 0.60    11.03       0.60     11.03     576.3
## "findCenvVar"          0.02     0.37       0.04      0.74       2.4
## "%in%"                 0.02     0.37       0.02      0.37       2.4
## 
## $by.total
##                          total.time total.pct mem.total self.time self.pct
## "siml"                         5.44    100.00    3917.4      1.10    20.22
## "block_exec"                   5.44    100.00    3917.4      0.00     0.00
## "call_block"                   5.44    100.00    3917.4      0.00     0.00
## "eval"                         5.44    100.00    3917.4      0.00     0.00
## "evaluate"                     5.44    100.00    3917.4      0.00     0.00
## "evaluate::evaluate"           5.44    100.00    3917.4      0.00     0.00
## "evaluate_call"                5.44    100.00    3917.4      0.00     0.00
## "FUN"                          5.44    100.00    3917.4      0.00     0.00
## "generator$render"             5.44    100.00    3917.4      0.00     0.00
## "handle"                       5.44    100.00    3917.4      0.00     0.00
## "in_dir"                       5.44    100.00    3917.4      0.00     0.00
## "knitr::knit"                  5.44    100.00    3917.4      0.00     0.00
## "lapply"                       5.44    100.00    3917.4      0.00     0.00
## "process_file"                 5.44    100.00    3917.4      0.00     0.00
## "process_group"                5.44    100.00    3917.4      0.00     0.00
## "process_group.block"          5.44    100.00    3917.4      0.00     0.00
## "render"                       5.44    100.00    3917.4      0.00     0.00
## "render_one"                   5.44    100.00    3917.4      0.00     0.00
## "rmarkdown::render"            5.44    100.00    3917.4      0.00     0.00
## "rmarkdown::render_site"       5.44    100.00    3917.4      0.00     0.00
## "sapply"                       5.44    100.00    3917.4      0.00     0.00
## "suppressMessages"             5.44    100.00    3917.4      0.00     0.00
## "timing_fn"                    5.44    100.00    3917.4      0.00     0.00
## "withCallingHandlers"          5.44    100.00    3917.4      0.00     0.00
## "withVisible"                  5.44    100.00    3917.4      0.00     0.00
## "runif"                        2.24     41.18    1703.2      2.24    41.18
## "as.list.default"              1.46     26.84     680.0      1.46    26.84
## "as.list"                      1.46     26.84     680.0      0.00     0.00
## "pow2"                         0.60     11.03     576.3      0.60    11.03
## "findCenvVar"                  0.04      0.74       2.4      0.02     0.37
## "cmp"                          0.04      0.74       2.4      0.00     0.00
## "cmpCall"                      0.04      0.74       2.4      0.00     0.00
## "cmpfun"                       0.04      0.74       2.4      0.00     0.00
## "compiler:::tryCmpfun"         0.04      0.74       2.4      0.00     0.00
## "doTryCatch"                   0.04      0.74       2.4      0.00     0.00
## "genCode"                      0.04      0.74       2.4      0.00     0.00
## "getInlineInfo"                0.04      0.74       2.4      0.00     0.00
## "h"                            0.04      0.74       2.4      0.00     0.00
## "tryCatch"                     0.04      0.74       2.4      0.00     0.00
## "tryCatchList"                 0.04      0.74       2.4      0.00     0.00
## "tryCatchOne"                  0.04      0.74       2.4      0.00     0.00
## "tryInline"                    0.04      0.74       2.4      0.00     0.00
## "%in%"                         0.02      0.37       2.4      0.02     0.37
## "cmpForBody"                   0.02      0.37       2.4      0.00     0.00
## "cmpPrim1"                     0.02      0.37       0.0      0.00     0.00
## "cmpPrim2"                     0.02      0.37       0.0      0.00     0.00
## "cmpSymbolAssign"              0.02      0.37       0.0      0.00     0.00
## "constantFold"                 0.02      0.37       2.4      0.00     0.00
## "constantFoldCall"             0.02      0.37       2.4      0.00     0.00
## "getFoldFun"                   0.02      0.37       2.4      0.00     0.00
## "isBaseVar"                    0.02      0.37       2.4      0.00     0.00
## 
## $sample.interval
## [1] 0.02
## 
## $sampling.time
## [1] 5.44

notice that the memory usage reported is the accumulated memory.

Profiling Memory: gc

A better approach would be by using gc() function. gc() reports the memory usage at some specific point. Information for parts of the code can be reported with the flags gcinfo():

size <- 1000000
gc()
#          used (Mb) gc trigger (Mb) max used (Mb)
#Ncells  551156 29.5    1222600 65.3  1067006 57.0
#Vcells 1413536 10.8    8388608 64.0  1745827 13.4

gcinfo(TRUE)   #checking the memory usage during function execution
res <- siml(size)
#... ommited lines
#Garbage collection 59 = 49+3+7 (level 0) ... 
#563.6 Mbytes of cons cells used (79%)
#171.1 Mbytes of vectors used (76%)
gcinfo(FALSE)

Profiling Memory: gc

A better approach would be by using gc() function. gc() reports the memory usage at some specific point. Information for parts of the code can be reported with the flags gcinfo():

Finally, a call to gc() will report the memory allocated for the outputs of the function res <- siml():

gc()
#          used (Mb) gc trigger  (Mb) max used  (Mb)
#Ncells  558900 29.9   10696839 571.3 11155454 595.8
#Vcells 2429818 18.6   23726023 181.1 29657289 226.3

Profiling Memory: gc

gc() can help us to find memory usage changes upon creating objects:

gc(reset=TRUE)
#          used (Mb) gc trigger (Mb) max used (Mb)
#Ncells  562188 30.1    1154511 61.7   562188 30.1
#Vcells 1425756 10.9    8388608 64.0  1425756 10.9

listp <- as.list(seq(10000000))
gc()
#           used  (Mb) gc trigger   (Mb) max used  (Mb)
#Ncells 10564701 564.3   22068058 1178.6 10567700 564.4
#Vcells 21431560 163.6   33209716  253.4 21441849 163.6

Profiling Memory: gc

rm(listp)
gc()
#          used (Mb) gc trigger  (Mb) max used  (Mb)
#Ncells  564859 30.2   17654447 942.9 10567700 564.4
#Vcells 1431905 11.0   26567773 202.7 21441849 163.6

gc(reset=TRUE)
#          used (Mb) gc trigger  (Mb) max used (Mb)
#Ncells  564869 30.2   14123558 754.3   564869 30.2
#Vcells 1431935 11.0   21254219 162.2  1431935 11.0   

Profiling Memory: Pryr

Another way to monitor the memory size of the objects is with the Pryr package which uses the function object_size() for this purpose.

library(pryr)

R allocates memory in a heuristic manner. To see this, let us monitor how an object request for memory as it grows with the object_size() function:

Profiling Memory: Pryr

sizes <- sapply(0:50, function(n) object_size(seq_len(n)))
plot(0:50, sizes, xlab = "Length", ylab = "Size (bytes)", 
     type = "s")

Profiling Memory: Pryr

another feature in R is that it tries to save memory by using pointers to existing memory allocations:

x <- 1:1e6
object_size(x)
## 4 MB
y <- list(x, x, x)
object_size(y)
## 4 MB

Profiling Memory: Pryr

object_size(x, y)
## 4 MB

after modifying one element of the list we get a different value:

y[[1]] <- as.integer(x+1-1)
object_size(y)
## 8 MB

Profiling Memory: Pryr

The mem_change() function helps you to figure out the change in size upon creating an object:

myf <- function() {
  mem_change( A <- matrix(1.0, 5000, 5000) )
  10
}
mem_change( z <- myf() )
# 1 kB

mem_change( A <- matrix(1.0, 5000, 5000) )
# 200 MB

Profiling Memory: Lineprof

Lineprof package can be installed with:

install.packages("devtools")
devtools::install_github("hadley/lineprof")

Profiling Memory: Lineprof

library(lineprof)
siml <- function(l) {
  c <- rep(0,l); 
  hits <- 0
  listp <- as.list(seq(10000000))
  pow2 <- function(x) { x2 <- sqrt( x[1]*x[1]+x[2]*x[2] ); return(x2) }
  for(i in 1:l){
    x = runif(2,-1,1)
    if( pow2(x) <=1 ){ hits <- hits + 1 }
    dens <- hits/i; pi_partial = dens*4; c[i] = pi_partial
  }
  
  return(c)
}

Profiling Memory: Lineprof

prof <- lineprof(siml(1000000))
prof

#     time   alloc release dups                                   ref
#1   0.009   5.624   0.000 1808 c("compiler:::tryCmpfun", "tryCatch")
#2   0.002   0.100   0.000  235                          character(0)
#3   0.520 556.091   5.220    2       c("as.list", "as.list.default")
#4   0.001   5.332   0.000    0                               "runif"
#5   0.001   5.253   0.000    0                          character(0)
#6   0.001   5.197   0.000    0                                    #6
#7   0.009  17.406  16.858    0                               "runif"
#8   0.001   8.230   0.000    0                          character(0)
#...  

Dealing with big arrays

For big data file we can use memory-mapped files with the bigmemory package. In case the bigmemory package is not installed execute the command:

install.packages("bigmemory")
library(bigmemory)
bm <- big.matrix(1e8, 3, backingfile = "bm", backingpath = getwd())
bm

the large array can be retrieved for subsequent use as follows:

my.bm <- attach.big.matrix(file.path(getwd(), "bm.desc"))

Dealing with big arrays

now, work with chunks of \(10^7\) rows,

chunksize <- 1e7
start <- 1
while (start <= nrow(bm)) {
  end <- min(start + chunksize -1, nrow(bm))
  chunksize <- end - start + 1
  bm[start:end, 1] <- rpois(chunksize, 1e3)
  bm[start:end, 2] <- sample(0:1, chunksize, TRUE, c(0.7,0.3))
  bm[start:end, 3] <- runif(chunksize, 0, 1e5)
  start <- start + chunksize
}

Summary

  • We studied some methods to monitor memory usage: Rprof and gc

  • One can also use the pryr and lineprof packages to have more detailed information, for instance memory changes upon creating objects and data duplication

  • To save space one can use the bigmemory package which works with file handlers instead of the actual data

References