#Calculate MAID-scores by fitting an exponential function to the MA-plot characteristics. #Sometimes different values for nlrobstart have to be specified when the regression does not converge. #The function should work fine when the MA-plot has a tadpole-shape (the normal case when similar samples are compared). #The function might run into problems when the MA-plot shows an eye-pattern (when very different samples are compared). #In the latter case one might question the study design and possibly choose a different filter criterion. maid <- function(M, A, thresh=2, visualize=T, nlrobstart=c(a=20, b=0.5, c=0.5)) { window = round(length(A)/100) Asort = A[order(A)] Msort = M[order(A)] y = {}; x = {} for (i in seq(1, (length(A)-window+1), window/10)) { y = append(y, as.numeric(quantile(Msort[i:(i+window-1)],3/4,type=5) - quantile(Msort[i:(i+window-1)],1/4,type=5))) x = append(x, mean(Asort[i:(i+window-1)])) } sp = smooth.spline(x, y) xs = c(0:199)/199*(max(x)-min(x))+min(x) ys = unlist(predict(sp, data.frame(x=xs))$y) xt = xs[41:200]; yt = ys[41:200] cf = nlrob(yt ~ a*exp(-b*xt)+c, data=data.frame(xt,yt), start=nlrobstart, maxit=1000) corrfactor = predict(cf, data.frame(xt=A)) maidscores = M/corrfactor if (visualize) { xx = c(0:199)/199*(max(A)+2-min(A))+min(A) f = predict(cf, data.frame(xt=xx)) plot(A, M, col="blue", cex.axis=1.4, cex.lab=1.4, cex=0.5, pch=20, ylim=max(abs(range(M)))*c(-1,1)+c(-0.25,0.25)) lines(xx, f, col="red", lwd=2, lty=2) lines(xx, thresh*f, col="red", lwd=3) lines(xx, -thresh*f, col="red", lwd=3) abline(0,0) points(x, y, cex=1, pch=20, col="grey") points(xt, yt, cex=1, pch=20) } return(maidscores) } #Filter genes based on MAID-scores and a paired two-sample t-test. filtergenes <- function(input1, input2, thresh=2, pvalthresh=0.05, visualize=T, print="", nlrobstart=c(a=20, b=0.5, c=0.5)) { ms1 = apply(input1,1,mean) ms2 = apply(input2,1,mean) M = log2(ms2/ms1) A = 1/2*log2(ms2*ms1) if (print!="") { png(filename=print, height=800, width=1200, bg="white") visualize=T } maidscores = maid(M, A, thresh, visualize, nlrobstart) rawp = c() for (i in 1:nrow(input1)) rawp[i] = t.test(input1[i,], input2[i,], paired=T)$p.value names(rawp) = names(M) filteredsubset = rawp<=pvalthresh & abs(maidscores)>=thresh if (visualize) { points(A[rawp<=pvalthresh], M[rawp<=pvalthresh], col="orange", cex=0.7, pch=20) points(A[filteredsubset], M[filteredsubset], col="green", cex=1.4, pch=20) } if (print!="") dev.off() returnthis = list(filtered=rownames(input1)[filteredsubset], M=M, A=A, maidscores=maidscores, pvalues=rawp, maidscores.thresh=thresh, pvalues.thresh=pvalthresh) return(returnthis) } #Create directories for images and data tables. make_directories <- function() { dir.create("Images", showWarnings=F) dir.create("Images/ChipImagesWeights", showWarnings=F) dir.create("Images/ChipImagesResiduals", showWarnings=F) dir.create("Images/MAPlotsNormalized", showWarnings=F) dir.create("Images/MAPlotsRaw", showWarnings=F) dir.create("Images/TimeCourses", showWarnings=F) dir.create("Tables", showWarnings=F) } #Plot all Affymetrix chips (pseudo-images, weights and residuals). make_pseudoimages <- function() { for (i in 1:length(files)) { chipname = substr(files[i], 1, nchar(files[i])-4) png(filename=paste("Images/ChipImagesWeights/WeightImage_",chipname,".png",sep=""), height=1000, width=950, bg="white") image(data[,i], cex.main=1.4) dev.off() png(filename=paste("Images/ChipImagesResiduals/ResidualImage_",chipname,".png",sep=""), height=1000, width=950, bg="white") par(cex.main=1.4) image(pfit, which=i, type="resids") dev.off() } } #Plot boxplots of (normalized) signal intensities. make_boxplots <- function() { png(filename="Images/Boxplot_signals.png", height=800, width=800, bg="white") boxplot(as.data.frame(signals), main="Boxplot signals", las=2, log="y", cex.main=1.4, cex.axis=1.2, cex.lab=1.2) dev.off() png(filename="Images/Boxplot_signals_normalized.png", height=800, width=800, bg="white") boxplot(as.data.frame(signals.normalized), main="Boxplot normalized signals", las=2, log="y", cex.main=1.4, cex.axis=1.2, cex.lab=1.2) dev.off() } #Plot densities of (normalized) signal intensities. make_densityplots <- function() { png(filename="Images/Densities_signals.png", height=700, width=700, bg="white") plot(density(log2(signals[,1])), main="Densities signals", ylab="", cex.main=1.4, cex.axis=1.2, cex.lab=1.2, xlab="log2(signal)", xlim=c(-3,17), ylim=c(0,0.2)) for (i in 2:length(files)) { lines(density(log2(signals[,i]))) } dev.off() png(filename="Images/Densities_signals_normalized.png", height=700, width=700, bg="white") plot(density(log2(signals.normalized[,1])), main="Densities normalized signals", ylab="", cex.main=1.4, cex.axis=1.2, cex.lab=1.2, xlab="log2(signal)", xlim=c(-3,17), ylim=c(0,0.2)) for (i in 2:length(files)) { lines(density(log2(signals.normalized[,i]))) } dev.off() } #Plot MA-plots including loess regression curve (raw signal intensities). make_maplots <- function() { for (j in 1:(ncol(signals)-1)) { for (k in (j+1):ncol(signals)) { M = log2(signals[,j]/signals[,k]) A = 1/2*log2(signals[,j]*signals[,k]) png(filename=paste("Images/MAPlotsRaw/MAplot_",colnames(signals)[j],"_vs_",colnames(signals)[k],".png",sep=""), height=750, width=900, bg="white") ma.plot(A, M, pch=21, ylim=max(abs(range(M)))*c(-1,1), bg="blue", cex.main=1.4, cex.axis=1.2, cex.lab=1.2, cex=0.5, show.statistics=F, family.loess="symmetric", subset=sample(1:length(M)), span=0.05, lwd=3, lty=1, main=paste(colnames(signals)[j],"vs",colnames(signals)[k])) dev.off() } } } #Plot MA-plots including loess regression curve (normalized signal intensities). make_maplots_normalized <- function() { for (j in 1:(ncol(signals.normalized)-1)) { for (k in (j+1):ncol(signals.normalized)) { M = log2(signals.normalized[,j]/signals.normalized[,k]) A = 1/2*log2(signals.normalized[,j]*signals.normalized[,k]) png(filename=paste("Images/MAPlotsNormalized/MAplot_",colnames(signals.normalized)[j],"_vs_",colnames(signals.normalized)[k],".png",sep=""), height=750, width=900, bg="white") ma.plot(A, M, pch=21, ylim=max(abs(range(M)))*c(-1,1), bg="blue", cex.main=1.4, cex.axis=1.2, cex.lab=1.2, cex=0.5, show.statistics=F, family.loess="symmetric", subset=sample(1:length(M)), span=0.05, lwd=3, lty=1, main=paste(colnames(signals.normalized)[j],"vs",colnames(signals.normalized)[k])) dev.off() } } } #Plot time courses of all filtered genes. make_timecourseplots <- function() { for (j in 1:nrow(signals.normalized.filtered)) { temp = signals.normalized.filtered[j,] png(filename=paste("Images/TimeCourses/",rownames(signals.normalized.filtered)[j],"_",genecards[rownames(signals.normalized.filtered),2][j],"_timecourse.png",sep=""), height=700, width=1000, bg="white") par(mfrow=c(1,2)) plot(range(append(temp,0)), xaxt="n", type="n", xlab="", ylab="signal intensity", main=paste("absolute -",genecards[rownames(signals.normalized.filtered),2][j]), cex.main=1.4, cex.axis=1.2, cex.lab=1.2) axis(1, c(1,2), c("t0","t1"), cex.axis=1.2) for (i in patientids) lines(c(temp[paste(i,"t0",sep="_")], temp[paste(i,"t1",sep="_")]), col=(1:length(patientids))[patientids==i], lwd=3) plot(range(append(log2(temp[paste(patientids,"t1",sep="_")]/temp[paste(patientids,"t0",sep="_")]),0)), xaxt="n", type="n", xlab="", ylab="log2 fold-change", main=paste(rownames(signals.normalized.filtered)[j],"- relative"), cex.main=1.4, cex.axis=1.2, cex.lab=1.2) axis(1, c(1,2), c("t0","t1"), cex.axis=1.2) for (i in patientids) lines(c(0, log2(temp[paste(i,"t1",sep="_")]/temp[paste(i,"t0",sep="_")])), col=(1:length(patientids))[patientids==i], lwd=3) dev.off() } } #Write some information on the filtered genes to a file (tab-separated format). write_filteredgenes_information <- function() { i = rownames(signals.normalized.filtered) gene_infos = genecards[i,c(1,2,7,9,8)] gene_infos = cbind(gene_infos, round(apply(signals.normalized.filtered[,paste(patientids,"t0",sep="_")],1,mean))) gene_infos = cbind(gene_infos, round(apply(signals.normalized.filtered[,paste(patientids,"t1",sep="_")],1,mean))) gene_infos = cbind(gene_infos, maid.t0t1$pvalues[i]) gene_infos = cbind(gene_infos, maid.t0t1$maidscores[i]) temp = rep("",length(i)) temp[maid.t0t1$maidscores[i]<0] = "Down" temp[maid.t0t1$maidscores[i]>0] = "Up" gene_infos = cbind(gene_infos, temp) colnames(gene_infos) = c("GeneCards ID", "Official Symbol", "Entrez Gene ID", "Official Full Name", "Gene Category", "Mean t0", "Mean t1", "T-Test P t1 vs t0", "MAID-Score t1 vs t0", "Regulation") write.table(file="Tables/filteredgenes_information.txt", gene_infos, quote=F, row.names=F, sep="\t") }