1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
| library(plyr) library(data.table) library(pipeR)
N <- 2000 nt <- 100 p <- 4 dataDT <- data.table(subId = rep(1:(N/nt), p, each = nt), variable = rep(1:p, each = N), timePnt = rep(seq(0, 10, length.out = nt), p*N/nt), value = rnorm(N*p))
getRawCrCov1 <- function(demeanDataDT){ baseDT <- demeanDataDT[ , .(t1 = rep(timePnt, length(timePnt)), t2 = rep(timePnt, each=length(timePnt)), value.var1 = rep(value, length(timePnt))), by = .(variable, subId)] rawCrCovDT <- do.call("dlply", list(demeanDataDT, "variable", function(df){ merge(baseDT[variable >= df$variable[1]], df, suffixes = c("1", "2"), by.x = c("subId", "t2"), by.y = c("subId", "timePnt")) })) %>>% rbindlist %>>% setnames("value", "value.var2") %>>% `[`(j = .(sse = sum(value.var1 * value.var2), cnt = .N), by = .(variable1, variable2, t1, t2)) %>>% setorder(variable1, variable2, t1, t2) %>>% `[`(j = weight := 1) return(rawCrCovDT) }
getRawCrCov2 <- function(demeanDataDT){ baseDT <- demeanDataDT[ , .(t1 = rep(timePnt, length(timePnt)), t2 = rep(timePnt, each=length(timePnt)), value.var1 = rep(value, length(timePnt))), by = .(variable, subId)] rawCrCovDT <- do.call("ddply", list(demeanDataDT, "variable", function(df){ merge(baseDT[variable >= df$variable[1]], df, suffixes = c("1", "2"), by.x = c("subId", "t2"), by.y = c("subId", "timePnt")) })) %>>% setDT %>>% `[`(j = variable := NULL) %>>% setnames("value", "value.var2") %>>% `[`(j = .(sse = sum(value.var1 * value.var2), cnt = .N), by = .(variable1, variable2, t1, t2)) %>>% setorder(variable1, variable2, t1, t2) %>>% `[`(j = weight := 1) return(rawCrCovDT) }
getRawCrCov3 <- function(demeanDataDT){ baseDT <- demeanDataDT[ , .(t1 = rep(timePnt, length(timePnt)), t2 = rep(timePnt, each=length(timePnt)), value.var1 = rep(value, length(timePnt))), by = .(variable, subId)] rawCrCovDT <- do.call("llply", list(split(demeanDataDT, by = "variable"), function(df){ merge(baseDT[variable >= df$variable[1]], df, suffixes = c("1", "2"), by.x = c("subId", "t2"), by.y = c("subId", "timePnt")) })) %>>% rbindlist %>>% setnames("value", "value.var2") %>>% `[`(j = .(sse = sum(value.var1 * value.var2), cnt = .N), by = .(variable1, variable2, t1, t2)) %>>% setorder(variable1, variable2, t1, t2) %>>% `[`(j = weight := 1) return(rawCrCovDT) }
getRawCrCov4 <- function(demeanDataDT){ baseDT <- demeanDataDT[ , .(t1 = rep(timePnt, length(timePnt)), t2 = rep(timePnt, each=length(timePnt)), value.var1 = rep(value, length(timePnt))), by = .(variable, subId)] setkey(baseDT, "subId", "t2") setkey(demeanDataDT, "subId", "timePnt") rawCrCovDT <- do.call("llply", list(split(demeanDataDT, by = "variable"), function(df){ merge(baseDT[variable >= df$variable[1]], df, suffixes = c("1", "2"), by.x = c("subId", "t2"), by.y = c("subId", "timePnt")) })) %>>% rbindlist %>>% setnames("value", "value.var2") %>>% `[`(j = .(sse = sum(value.var1 * value.var2), cnt = .N), by = .(variable1, variable2, t1, t2)) %>>% setorder(variable1, variable2, t1, t2) %>>% `[`(j = weight := 1) return(rawCrCovDT) }
x1 <- getRawCrCov1(dataDT) x2 <- getRawCrCov2(dataDT) x3 <- getRawCrCov3(dataDT) x4 <- getRawCrCov4(dataDT) all.equal(x1, x2) all.equal(x1, x3) all.equal(x1, x4)
library(microbenchmark) microbenchmark(rbindlist = getRawCrCov1(dataDT), ddply = getRawCrCov2(dataDT), split.DT = getRawCrCov3(dataDT), setkey.first = getRawCrCov4(dataDT), times = 50L)
|