Section 41 Reshape Data: library(reshape2)
41.1 library(reshape2)
The library reshape2
has a convenient sets of functions to reshape data.
The combination of melt
and cast
functions helps reshaping and aggregating the data.
41.2 Usage reshape2
library(reshape2)
# data.frame
melt(data, id.vars, measure.vars,
variable.name = "variable", ..., na.rm = FALSE, value.name = "value",
factorsAsStrings = TRUE)
dcast(data, formula, fun.aggregate = NULL, ..., margins = NULL,
subset = NULL, fill = NULL, drop = TRUE,
value.var = guess_value(data))
# matrix
melt(data, varnames = names(dimnames(data)), ...,
na.rm = FALSE, as.is = FALSE, value.name = "value")
acast(data, formula, fun.aggregate = NULL, ..., margins = NULL,
subset = NULL, fill = NULL, drop = TRUE,
value.var = guess_value(data))
41.3 Example 1: One variable
library(reshape2)
set.seed(123)
ID <- paste0('S',1:5)
Sex <- sample(x = c('M','F'), size = length(ID), replace = TRUE)
Ht1 <- sample(45:55, size = length(ID), replace = FALSE)
Ht2 <- sample(60:70, size = length(ID), replace = TRUE)
Ht3 <- sample(75:85, size = length(ID), replace = TRUE)
DF <- data.frame(ID=ID, Sex=Sex, Ht1=Ht1, Ht2=Ht2, Ht3=Ht3)
# DF
# long format
mDF <- melt(data = DF,
id.vars = c('ID', 'Sex'),
measure.vars = c('Ht1','Ht2','Ht3'),
variable.name = 'Time',
value.name = 'Height')
# wide format
cDF <- dcast(data = mDF,
formula = ID + Sex ~ Time,
value.var = 'Height')
# aggregate data
dcast(data = mDF,
formula = Sex ~ Time,
fun.aggregate = length,
value.var = 'Height')
Sex Ht1 Ht2 Ht3
1 F 1 1 1
2 M 4 4 4
Sex Ht1 Ht2 Ht3
1 F 47 69 83
2 M 199 266 324
Time F M
1 Ht1 1 4
2 Ht2 1 4
3 Ht3 1 4
dcast(data = mDF,
formula = Sex ~ Time,
fun.aggregate = length,
margins = TRUE,
value.var = 'Height')
Sex Ht1 Ht2 Ht3 (all)
1 F 1 1 1 3
2 M 4 4 4 12
3 (all) 5 5 5 15
Sex Ht1 Ht2 Ht3
1 F 47.00 69.0 83
2 M 49.75 66.5 81
Sex Ht1 Ht2 Ht3 (all)
1 F 47.00 69.0 83.0 66.33333
2 M 49.75 66.5 81.0 65.75000
3 (all) 49.20 67.0 81.4 65.86667
41.4 Example 2: Two variables
set.seed(123)
ID <- paste0('S',1:5)
Sex <- sample(x = c('M','F'), size = length(ID), replace = TRUE)
Ht1 <- sample(45:55, size = length(ID), replace = FALSE)
Ht2 <- sample(60:70, size = length(ID), replace = TRUE)
Ht3 <- sample(75:85, size = length(ID), replace = TRUE)
Wt1 <- sample(3:6, size = length(ID), replace = TRUE)
Wt2 <- sample(8:12, size = length(ID), replace = TRUE)
Wt3 <- sample(13:18, size = length(ID), replace = TRUE)
DF <- data.frame(DF, Wt1=Wt1, Wt2=Wt2, Wt3=Wt3)
DF
# long format
mDF <- melt(data = DF,
id.vars = c('ID', 'Sex'),
measure.vars = c('Ht1','Ht2','Ht3',
'Wt1','Wt2','Wt3'),
variable.name = 'Var',
value.name = 'Value')
# wide format
cDF <- dcast(data = mDF,
formula = ID + Sex ~ Var,
value.var = 'Value')
# aggregate data
dcast(data = mDF,
formula = Sex ~ Var,
fun.aggregate = length,
value.var = 'Value')
dcast(data = mDF,
formula = Sex ~ Var,
fun.aggregate = length,
margins = TRUE,
value.var = 'Value')
dcast(data = mDF,
formula = Sex ~ Var,
fun.aggregate = mean,
value.var = 'Value')
dcast(data = mDF,
formula = Sex ~ Var,
fun.aggregate = mean,
margins = TRUE,
value.var = 'Value')