Sunday, March 15, 2015

function which runs two sample t-test on grouped data

1 function which runs two sample t-test on grouped data frame

  • the functions takes a data frame as its first argument
  • the second argument group is the column which should use for splitting the data frame
  • col indicates the numeric column to pass through to the t.test() function
  • incol should be a binary variable, which is also be passed through to t.test()


df.t.test <- function(df,group,col,indcol){
    t.test.helper <- function(x,col,indcol,group){
        tob <- t.test(x[,col] ~ x[,indcol])
        tmp <- data.frame(data = paste(col,"by",indcol),
                          group = x[1,group],
                          mean.group.1 = tob$estimate[1],
                          mean.group.2 = tob$estimate[2],
                          name.test.stat = tob$statistic,
                          conf.lower = tob$conf.int[1],
                          conf.upper = tob$conf.int[2],
                          pval = tob$p.value,
                          alternative = tob$alternative,
                          tob$method)
        names(tmp)[3:4] <- make.names(names(tob$estimate))
        row.names(tmp) <- x[1,group]
        tmp
    }
    df.l <- split(df[,c(col,indcol,group)],df[,group])
    Reduce(rbind,lapply(df.l,t.test.helper,col=col,indcol=indcol,group=group))}


## example data
examp.data <- data.frame(group=gl(10,100),
                         values=rnorm(1000),
                         t.group=sample(letters[1:2],1000,replace=T))
## example
df.t.test(examp.data,"group","values","t.group")

data group mean.in.group.a mean.in.group.b name.test.stat
1  values by t.group     1      0.06958824      0.02803721      0.2244456
2  values by t.group     2     -0.20944827     -0.06033410     -0.8368429
3  values by t.group     3     -0.20387479      0.07940850     -1.3245172
4  values by t.group     4      0.11406709      0.01975937      0.4220244
5  values by t.group     5      0.09060241     -0.10442099      1.0620544
6  values by t.group     6     -0.05623630     -0.07537593      0.1056388
7  values by t.group     7     -0.26081841     -0.02721652     -0.9533887
8  values by t.group     8     -0.04723535     -0.17205804      0.5662930
9  values by t.group     9      0.08185406      0.01676488      0.3033993
10 values by t.group    10     -0.41406196     -0.02193303     -2.1353113
   conf.lower  conf.upper       pval alternative              tob.method
1  -0.3263444  0.40944644 0.82293023   two.sided Welch Two Sample t-test
2  -0.5030591  0.20483073 0.40487276   two.sided Welch Two Sample t-test
3  -0.7083319  0.14176535 0.18876884   two.sided Welch Two Sample t-test
4  -0.3491642  0.53777963 0.67393371   two.sided Welch Two Sample t-test
5  -0.1693819  0.55942873 0.29082158   two.sided Welch Two Sample t-test
6  -0.3404432  0.37872241 0.91608663   two.sided Welch Two Sample t-test
7  -0.7200269  0.25282312 0.34281018   two.sided Welch Two Sample t-test
8  -0.3126875  0.56233286 0.57251104   two.sided Welch Two Sample t-test
9  -0.3606510  0.49082936 0.76222949   two.sided Welch Two Sample t-test
10 -0.7566487 -0.02760917 0.03527904   two.sided Welch Two Sample t-test