## Hugo Quené, R course at UiL OTS, June 2012 ## h.quene@uu.nl ## MEREL KEIJZER and RIAS VAN DEN DOEL # load edited data set from Rda file load(file=url("http://www.hugoquene.nl/R/keijzerdoel.rda")) # object name specified in object # cleanup already done to remove email adresses kd01.lmer <- lmer((Nslike=="1")~1+(1|Subj)+(1|Stim), data=keijzerdoel, family="binomial") kd01.lmer #Random effects: # Groups Name Variance Std.Dev. # Subj (Intercept) 0.58236 0.76313 # Stim (Intercept) 5.59637 2.36567 #Number of obs: 3390, groups: Subj, 169; Stim, 20 # Notice that the between-item variance is very large! # Can we explain the between-item variability? # Most predictors seem to capture subjects' properties, not item properties. #Fixed effects: # Estimate Std. Error z value Pr(>|z|) #(Intercept) 0.8434 0.5367 1.572 0.116 # include trial number (centered) as predictor kd02.lmer <- update(kd01.lmer, .~.+I(Trial-10)) # not only in fixed part but also in random part, cf Barr 2012 kd02.lmer <- update(kd01.lmer, .~.+I(Trial-10)-(1|Subj)+(I(Trial-10)|Subj)-(1|Stim)+(I(Trial-10)|Stim)) kd02.lmer #Random effects: # Groups Name Variance Std.Dev. Corr # Subj (Intercept) 0.577699235 0.7600653 # I(Trial - 10) 0.000959356 0.0309735 0.876 # Stim (Intercept) 5.747126123 2.3973164 # I(Trial - 10) 0.000094006 0.0096957 -1.000 #Number of obs: 3390, groups: Subj, 169; Stim, 20 # #Fixed effects: # Estimate Std. Error z value Pr(>|z|) #(Intercept) 0.861259 0.543771 1.584 0.113 #I(Trial - 10) -0.007166 0.009323 -0.769 0.442 # Trial is not significant. # Average slope of Trial is -0.007. # Slope of Trial varies between subjects. # Subjects with higher intercept tend to have higher slope as well. # Let's use the confidence ratings! 1=certain 5=uncertain kd03.lmer <- glmer( (Nslike == "1") ~ 1 + (1 | Subj) + (1 | Stim), data=keijzerdoel, weights=1/Conf, family="binomial" ) kd03.lmer # add Native predictor, at subjects level kd04.lmer <- glmer( (Nslike == "1") ~ 1 + (1 | Subj) + (1 | Stim)+Native, data=keijzerdoel, weights=1/Conf, family="binomial" ) # use Conf for weights! kd04.lmer anova(kd04.lmer,kd03.lmer) ### The remarks below may be obsolete -- # Notice that tau^2 (variance between Subj) is even LARGER in kd04 than in kd03. # This suggests that the larger model does not perform better in predicting responses. # i.e. R^2_2 is almost zero. # inspect "R2 change" (proportional reduction of prediction error) at subjects level, # see handout added to yesterday's session. # harmonic mean harmonic.mean <- function( x, na.rm=T ) { return( 1/mean(1/x,na.rm=na.rm)) } harmonic.mean( table(keijzerdoel$Subj) ) # use n=20 ### The analyses below may be obsolete -- # (0.010347 + 0.087866/20) # R^2 of model kd03 # [1] 0.0147403 # (0.010443 + 0.087866/20) # R^2 of model kd04 # [1] 0.0148363 # 1-(.0148/.0147) # proportional reduction # [1] -0.006802721 # the negative result indicates that model kd04 performs WORSE (not better) than kd03. # inspect random coefficients of Subj native.bysubj <- tapply( keijzerdoel$Native, keijzerdoel$Subj, unique ) # auxiliary var table(native.bysubj) #native.bysubj # 1 2 #163 6 # Notice that there are only a few nonnatives. # plot random estimates (BLUPs) of Subj, and highlight nonnatives plot(ranef(kd03.lmer)$Subj[[1]], col=ifelse(native.bysubj==1,1,2), pch=ifelse(native.bysubj==1,1,16) ) # Notice an outlier.