# BIMM2012 - HARDMAN DATA # TWO-STAGE BOOTSTRAP PROCEDURE # Hugo Quené, 17 Feb 2012 # # for description see Nooteboom & Quené 2008 JML # initialization # nr of iterations should be at least 250, preferably about 1000 niter <- 3 # number of bootstrap iterations, for demo only # put subject id's in sorted list items <- sort(unique(hardman$KeyWord)) nitems <- length(items) set.seed(17) # random number seed, for reproduction # define function resample, safe, with replacement, for bootstrap # after example in help(sample) resample <- function(x, size, ...) if(length(x) <= 1) { if(!missing(size) && size == 0) x[FALSE] else x } else sample(x, size, replace=T, ...) # 9 random coefs results.coef <- matrix(NA,nrow=niter,ncol=9) btime <- proc.time() # begin.time # loop for bootstrap iterations starts here for (iter in 1:niter) { # current sample, with replacement, of subjects ## modified 20060510 # stage one: perform bootstrap over ITEMS these.words <- sort(items[resample(1:nitems,size=nitems-1)]) # Eq.6.29, p.248 # construct data set, containing all data from words sampled in these.words. # the number of observations per word varies among words. bootdata <- matrix( NA,ncol=12 ) dimnames(bootdata)[[2]] <- dimnames(hardman)[[2]] # because rbind wants dimnames to match for (wi in 1:nitems-1) { # take rows of corresponding item id, and rbind these to temp matrix called bootdata bootdata <- rbind( bootdata, as.vector( hardman[hardman$KeyWord==these.words[wi],]) ) # cat("data from item ",this.subj," added to bootdata \n") # because the number of obs may vary among words, dim(bootdata)!=dim(hardman) } # stage two: perform bootstrap over bootdata nrows <- dim(bootdata)[1] ## added 20060601 # remove first line of NA's from bootdata, before resampling bootdata <- bootdata[2:nrows,] nrows <- nrows-1 these.rows <- sort(resample(1:nrows,size=nrows)) # stage two resampling bootdata.rs <- bootdata[these.rows,] # perform GLMM and keep random estimates m04 <- glmer( KeyWordResponse ~ (1 + I(Trial - 30.5) | Listener) + (1 + I(Trial - 30.5) | talker) + (1 + I(Trial - 30.5) | KeyWord) + lisL1 + talkL1 + lisL1:talkL1 , data=bootdata.rs, family="binomial" ) results.coef[iter,] <- unlist(summary(m04)@ST)[c(1,4,2,5,8,6,9,12,10)] # sd and cor units # for each of KeyWord, Listener, Talker: # intercept sd, slope sd, correlation tooktime <- proc.time()-btime cat(iter,"two-stage bootstrap iterations, with GLMM, took approx",tooktime[3],"sec CPU (",tooktime[3]/iter,"per iter) \n") } # loop for bootstrap iteration ends here dimnames(results.coef)[[1]] <- paste("sim_",1:niter,sep="") aux <- summary(m04)@REmat dimnames(results.coef)[[2]][c(1,2,4,5,7,8)] <- paste( aux[1:6,2],"|",aux[1:6,1], sep="" ) dimnames(results.coef)[[2]][c(3,6,9)] <- paste( "cor|",aux[c(1,3,5),1], sep="" ) # show results results.coef # cleanup remove( m04,aux,niter,nrows,wi,items,nitems,iter,btime,tooktime,these.rows,these.words )