Section 41 Reshape Data: library(reshape2)

41.1 library(reshape2)

The library reshape2 has a convenient sets of functions to reshape data.

The combination of melt and cast functions helps reshaping and aggregating the data.

41.2 Usage reshape2

library(reshape2)

# data.frame

melt(data, id.vars, measure.vars,
  variable.name = "variable", ..., na.rm = FALSE, value.name = "value",
  factorsAsStrings = TRUE)

dcast(data, formula, fun.aggregate = NULL, ..., margins = NULL,
  subset = NULL, fill = NULL, drop = TRUE,
  value.var = guess_value(data))


# matrix

melt(data, varnames = names(dimnames(data)), ...,
  na.rm = FALSE, as.is = FALSE, value.name = "value")

acast(data, formula, fun.aggregate = NULL, ..., margins = NULL,
  subset = NULL, fill = NULL, drop = TRUE,
  value.var = guess_value(data))


41.3 Example 1: One variable

library(reshape2)

set.seed(123)

ID <- paste0('S',1:5)
Sex <- sample(x = c('M','F'), size = length(ID), replace = TRUE)
Ht1 <- sample(45:55, size = length(ID), replace = FALSE)
Ht2 <- sample(60:70, size = length(ID), replace = TRUE)
Ht3 <- sample(75:85, size = length(ID), replace = TRUE)

DF <- data.frame(ID=ID, Sex=Sex, Ht1=Ht1, Ht2=Ht2, Ht3=Ht3)

# DF


# long format

mDF <- melt(data = DF,
            id.vars = c('ID', 'Sex'),
            measure.vars = c('Ht1','Ht2','Ht3'),
            variable.name = 'Time',
            value.name = 'Height')

# wide format

cDF <- dcast(data = mDF, 
             formula = ID + Sex ~ Time,
             value.var = 'Height')


# aggregate data

dcast(data = mDF, 
     formula = Sex ~ Time,
     fun.aggregate = length,
     value.var = 'Height')
  Sex Ht1 Ht2 Ht3
1   F   1   1   1
2   M   4   4   4
dcast(data = mDF, 
     formula = Sex ~ Time,
     fun.aggregate = sum,
     value.var = 'Height')
  Sex Ht1 Ht2 Ht3
1   F  47  69  83
2   M 199 266 324
dcast(data = mDF, 
     formula = Time ~ Sex,
     fun.aggregate = length,
     value.var = 'Height')
  Time F M
1  Ht1 1 4
2  Ht2 1 4
3  Ht3 1 4
dcast(data = mDF, 
     formula = Sex ~ Time,
     fun.aggregate = length,
     margins = TRUE,
     value.var = 'Height')
    Sex Ht1 Ht2 Ht3 (all)
1     F   1   1   1     3
2     M   4   4   4    12
3 (all)   5   5   5    15
dcast(data = mDF, 
     formula = Sex ~ Time,
     fun.aggregate = mean,
     value.var = 'Height')
  Sex   Ht1  Ht2 Ht3
1   F 47.00 69.0  83
2   M 49.75 66.5  81
dcast(data = mDF, 
     formula = Sex ~ Time,
     fun.aggregate = mean,
     margins = TRUE,
     value.var = 'Height')
    Sex   Ht1  Ht2  Ht3    (all)
1     F 47.00 69.0 83.0 66.33333
2     M 49.75 66.5 81.0 65.75000
3 (all) 49.20 67.0 81.4 65.86667


41.3.1 Data


41.3.2 Data: Long format


41.3.3 Data: Wide format


41.4 Example 2: Two variables

set.seed(123)

ID <- paste0('S',1:5)
Sex <- sample(x = c('M','F'), size = length(ID), replace = TRUE)
Ht1 <- sample(45:55, size = length(ID), replace = FALSE)
Ht2 <- sample(60:70, size = length(ID), replace = TRUE)
Ht3 <- sample(75:85, size = length(ID), replace = TRUE)

Wt1 <- sample(3:6, size = length(ID), replace = TRUE)
Wt2 <- sample(8:12, size = length(ID), replace = TRUE)
Wt3 <- sample(13:18, size = length(ID), replace = TRUE)

DF <- data.frame(DF, Wt1=Wt1, Wt2=Wt2, Wt3=Wt3)

DF


# long format

mDF <- melt(data = DF,
            id.vars = c('ID', 'Sex'),
            measure.vars = c('Ht1','Ht2','Ht3',
                             'Wt1','Wt2','Wt3'),
            variable.name = 'Var',
            value.name = 'Value')


# wide format

cDF <- dcast(data = mDF, 
             formula = ID + Sex ~ Var,
             value.var = 'Value')


# aggregate data


dcast(data = mDF, 
     formula = Sex ~ Var,
     fun.aggregate = length,
     value.var = 'Value')


dcast(data = mDF, 
     formula = Sex ~ Var,
     fun.aggregate = length,
     margins = TRUE,
     value.var = 'Value')


dcast(data = mDF, 
     formula = Sex ~ Var,
     fun.aggregate = mean,
     value.var = 'Value')


dcast(data = mDF, 
     formula = Sex ~ Var,
     fun.aggregate = mean,
     margins = TRUE,
     value.var = 'Value')


41.4.1 Data


41.4.2 Data: Long format


41.4.3 Data: Wide format


41.5 library(tidyr)

library(tidyr)
library(dplyr)

# long format

DF %>% 
  gather(key = 'Var', value = 'Value', Ht1:Wt3, factor_key = FALSE) -> mDF


# wide format

mDF %>% 
  spread(key = 'Var', value = 'Value') -> cDF