## Hugo Quené, R course at UiL OTS, June 2012
## h.quene@uu.nl

## MEREL KEIJZER and RIAS VAN DEN DOEL

# load edited data set from Rda file
load(file=url("http://www.hugoquene.nl/R/keijzerdoel.rda")) # object name specified in object
# cleanup already done to remove email adresses
kd01.lmer <- lmer((Nslike=="1")~1+(1|Subj)+(1|Stim), data=keijzerdoel, family="binomial")
kd01.lmer
#Random effects:
# Groups Name        Variance Std.Dev.
# Subj   (Intercept) 0.58236  0.76313 
# Stim   (Intercept) 5.59637  2.36567 
#Number of obs: 3390, groups: Subj, 169; Stim, 20
# Notice that the between-item variance is very large! 
# Can we explain the between-item variability? 
# Most predictors seem to capture subjects' properties, not item properties. 
#Fixed effects:
#            Estimate Std. Error z value Pr(>|z|)
#(Intercept)   0.8434     0.5367   1.572    0.116

# include trial number (centered) as predictor
kd02.lmer <- update(kd01.lmer, .~.+I(Trial-10))
# not only in fixed part but also in random part, cf Barr 2012
kd02.lmer <- update(kd01.lmer, .~.+I(Trial-10)-(1|Subj)+(I(Trial-10)|Subj)-(1|Stim)+(I(Trial-10)|Stim))
kd02.lmer
#Random effects:
# Groups Name          Variance    Std.Dev.  Corr   
# Subj   (Intercept)   0.577699235 0.7600653        
#        I(Trial - 10) 0.000959356 0.0309735 0.876  
# Stim   (Intercept)   5.747126123 2.3973164        
#        I(Trial - 10) 0.000094006 0.0096957 -1.000 
#Number of obs: 3390, groups: Subj, 169; Stim, 20
#
#Fixed effects:
#               Estimate Std. Error z value Pr(>|z|)
#(Intercept)    0.861259   0.543771   1.584    0.113
#I(Trial - 10) -0.007166   0.009323  -0.769    0.442

# Trial is not significant.
# Average slope of Trial is -0.007.
# Slope of Trial varies between subjects.
# Subjects with higher intercept tend to have higher slope as well. 

# Let's use the confidence ratings! 1=certain 5=uncertain
kd03.lmer <- glmer( (Nslike == "1") ~ 1 + (1 | Subj) + (1 | Stim), data=keijzerdoel, weights=1/Conf, family="binomial" )
kd03.lmer

# add Native predictor, at subjects level
kd04.lmer <- glmer( (Nslike == "1") ~ 1 + (1 | Subj) + (1 | Stim)+Native, 
	data=keijzerdoel, weights=1/Conf, family="binomial" )	# use Conf for weights! 
kd04.lmer

anova(kd04.lmer,kd03.lmer)

### The remarks below may be obsolete --
# Notice that tau^2 (variance between Subj) is even LARGER in kd04 than in kd03. 
# This suggests that the larger model does not perform better in predicting responses. 
# i.e. R^2_2 is almost zero.

# inspect "R2 change" (proportional reduction of prediction error) at subjects level,
# see handout added to yesterday's session. 
# harmonic mean
harmonic.mean <- function( x, na.rm=T ) { return( 1/mean(1/x,na.rm=na.rm)) }
harmonic.mean( table(keijzerdoel$Subj) )
# use n=20
### The analyses below may be obsolete --
# (0.010347 + 0.087866/20) # R^2 of model kd03
# [1] 0.0147403
# (0.010443 + 0.087866/20) # R^2 of model kd04
# [1] 0.0148363
# 1-(.0148/.0147) # proportional reduction
# [1] -0.006802721
# the negative result indicates that model kd04 performs WORSE (not better) than kd03. 

# inspect random coefficients of Subj
native.bysubj <- tapply( keijzerdoel$Native, keijzerdoel$Subj, unique ) # auxiliary var
table(native.bysubj)
#native.bysubj
#  1   2 
#163   6 
# Notice that there are only a few nonnatives. 
# plot random estimates (BLUPs) of Subj, and highlight nonnatives
plot(ranef(kd03.lmer)$Subj[[1]], 
 	col=ifelse(native.bysubj==1,1,2), pch=ifelse(native.bysubj==1,1,16) )
# Notice an outlier.