Here are a few methods and their timings.
createDF1 <- function(colVec, data)
{
m <- matrix(, nrow = nrow(data), ncol = length(colVec),
dimnames = list(NULL, colVec))
m[, names(data)] <- as.matrix(data)
data.frame(apply(m, 2, as.numeric))
}
createDF2 <- function(colVec, data)
{
rr <- setNames(rep(list(rep(NA_integer_, nrow(data))), length(colVec)), .
nm = colVec)
rr[match(names(data), colVec)] <- data
as.data.frame(rr)
}
createDF3 <- function(colVec, data)
{
rr <- setNames(replicate(length(colVec),
list(rep(NA_integer_, nrow(data)))),
nm = colVec)
rr[match(names(d), colVec)] <- data
as.data.frame(rr)
}
Create a 3,000,000 x 3 data frame to test on:
columns <- letters[1:21]
d <- data.frame(g = 1:3e6L, s = 1:3e6L, j = 1:3e6L)
Run some tests:
system.time({ createDF1(columns, d) })
# user system elapsed
# 5.022 1.023 6.054
system.time({ createDF2(columns, d) })
# user system elapsed
# 0.007 0.004 0.011
system.time({ createDF3(columns, d) })
# user system elapsed
# 0.105 0.077 0.183
Of these three, it looks like rep(list(rep(NA_integer_, nrow(data))), length(columns)) is the way to go, and replace values from that.