# # This R function provides a convenient way to visualize the distribution of grouped numerical data. # Engelmann-Hecker-Plot - EH-Plot # # author: Robby Engelmann # provided by: Michael Hecker # date: 12-05-2010 # e-mail address: michael.hecker@rocketmail.com # address 1: Leibniz Institute for Natural Product Research and Infection Biology - Hans-Knoell-Institute, # Beutenbergstr. 11a, D-07745 Jena, Germany # address 2: Steinbeis Transfer Center for Proteome Analysis, # Schillingallee 68, D-18057 Rostock, Germany # # author's comment: # Please feel free to use, modify or redistribute this file without any restrictions. # #ARGUMENTS #data : Vector of numerical data. #groups : Vector of group names which should have the same length as data. #intervals : The data are dissected in a certain number of intervals. If data points are in the same interval there are drawn side-by-side. #offset : This variable determines the x-distance between two data points that where found at the same level. #median : To show the median of each group. Default is T. #box : To overlay a boxplot. Default is F. #boxborder : The color of the boxplot if a boxplot is drawn. #FUNCTION ehplot <- function(data, groups, intervals=50, offset=0.1, median=T, box=F, boxborder="#777777", xlab="groups", ylab="values", ...) { ngroups = names(table(groups)) plot(data, xlim=c(0.5,length(ngroups)+0.5), xaxt="n", type="n", xlab=xlab, ylab=ylab, ...) if (box) boxplot(data ~ groups, border=boxborder, add=T) axis(1, at=1:length(ngroups), labels=ngroups, ...); axis(1, at=1:length(ngroups), labels=F, col="black") seps = seq(from=min(data), to=max(data), length.out=intervals+1) for (i in 1:length(ngroups)) { histo = hist(data[groups==ngroups[i]], breaks=seps, plot=F)$counts sortdata = sort(data[groups==ngroups[i]]) count = 1 for (j in 1:intervals) { if (histo[j]==0) next if (histo[j]==1) { points(i, sortdata[count], ...); count=count+1; next } if (histo[j]%%2) { points(i, sortdata[count], ...); count=count+1 xchange = offset for (k in 1:(histo[j]-1)) { points(i-xchange, sortdata[count], ...); count=count+1 xchange = -xchange if (!(k%%2)) { xchange=xchange+offset } if (xchange>0.4) { xchange=offset/2 } } } else { xchange = offset/2 for (k in 1:histo[j]) { points(i-xchange, sortdata[count], ...); count=count+1 xchange = -xchange if (!(k%%2)) { xchange=xchange+offset } if (xchange>0.4) { xchange=0 } } } } if (median) lines(c(i-0.3,i+0.3), rep(median(data[groups==ngroups[i]]),2), lwd=3) } } #EXAMPLES data(iris) x11(); ehplot(iris$Sepal.Length, iris$Species, intervals=20, cex=1.8, pch=20) x11(); ehplot(iris$Sepal.Width, iris$Species, intervals=20, box=T, median=F) x11(); ehplot(iris$Petal.Length, iris$Species, pch=17, col="red")