# function for exploratory data analysis # Hugo Quené, h.quene@uu.nl, 25 Jun 2012 # 2013.01.2013 modified for EMLAR9 # added addConst argument, and check arguments, to add constant # added dnorm call to create normal density plot # 2014.12.04 revised function eda.fnc # added datax=T so that histogram and qqplot have similar x axis # changed method for determining Q1, Q2, Q3 values, now using fivenum # added Z scores to qqnorm plot # added sep= argument in calls of paste # removed rugSide argument # you may provide many arguments, preferably with default values eda.fnc <- function( xx, # exploratory data analysis of variable `xx` densCol="steelblue", # + color for rug and density line # rugSide=1, # + side where to place rug: 1B 2L 3T 4R addConst=FALSE ) # add constant to prevent neg values? { xxname <- deparse(substitute(xx)) withboxcox <- require(MASS) # returns TRUE if success minxx <- min(xx,na.rm=T) if (withboxcox) { if (minxx<=0) { if (addConst) { xx <- xx+abs(minxx)+1 warning( paste("constant",abs(minxx)+1,"added to",xxname) ) } else { withboxcox <- FALSE warning( "no Box-Cox profile" ) } } } ifelse( withboxcox, # if withboxcox ops <- par(mfrow=c(1,3)), # then 3 panels ops <- par(mfrow=c(1,2)) ) # else 2 panels qqq <- fivenum(xx)[2:4] # Q1, Q2, Q3 # first panel or frame hist(xx, prob=T, col="wheat", # histogram xlab=xxname, main=paste("Histogram of\n",xxname,sep="") ) curve( dnorm(x,mean=mean(xx),sd=sd(xx)), add=TRUE, col="red", lwd=2) rug(xx,col=densCol,side=1) # rug at bottom lines(density(xx),col=densCol,lty=2,lwd=2) # add line to graph abline( v=qqq, lty=2, col="grey" ) mtext( c("Q1","Q2","Q3"), at=qqq, side=3, line=0, cex=0.7, col="grey") # second panel or frame qqnorm(xx,pch=16,cex=0.7, # QQ normal plot col=densCol, main=paste("Normal QQ Plot of\n",xxname,sep=""), ylab=xxname, datax=TRUE ) # notice ylab with datax=T qqline(xx,col="red",lwd=2, datax=TRUE ) # with normal line # grid lines for qqline, through Q1 and Q3 abline( h=qnorm(c(.25,.50,.75)), lty=2, col="grey" ) abline( v=qqq, lty=2, col="grey" ) mtext( c("Q1","Q2","Q3"), at=qqq, side=3, line=0, cex=0.7, col="grey") mtext( text=round(qnorm(c(.25,.50,.75)),2), at=qnorm(c(.25,.50,.75)), side=4, line=0, cex=0.7, col="grey") if (withboxcox) { # third panel or frame boxcox(xx~1 ) # box-cox likelihood of lambda title( main=paste("LL of power transformation of\n",xxname,sep="") ) } par(ops) # restore old parameter settings rm(xxname) # cleanup }