# Eklund_script03.txt # 2007-12-06 # Aron Charles Eklund # http://www.cbs.dtu.dk/suppl/biascorr/ # # Fig 6 (consistency of correlation of genes with ER status in breast cancer) # note: this script may require quite a bit of RAM # (it uses >10GB on my machine) # R 2.5.1 library(affy) library(bias) library(squash) ## hgu133a breast cancer data sets w/ ER annotation: ds <- c('hess', 'wang', 'sotiriou', 'gse3494.u133a', 'gse2603') loadIntoList <- function(filenames) { e <- new.env() for (f in filenames) { load(f, envir = e) } rev(as.list(e)) } ds.batch <- paste('~/aron/chb/data/int/', ds, '.batch.RData', sep = '') all.batch <- loadIntoList(ds.batch) names(all.batch) <- ds ## these data sets were saved in an older format; need to update all.batch <- lapply(all.batch, updateObject) all.rma <- lapply(all.batch, rma) ## do bias correction all.bm <- lapply(ds, function(i) { getBiasMetrics(all.batch[[i]], all.rma[[i]]) }) names(all.bm) <- ds all.rma.bc <- lapply(ds, function(i) { biasCorrection(all.rma[[i]], all.bm[[i]]) }) names(all.rma.bc) <- ds ## Estrogen Receptor status all.er <- list( hess = all.rma$hess$erpos, wang = as.numeric(all.rma$wang$ER), sotiriou = all.rma$sotiriou$er, gse3494.u133a = c(1, 0, NA)[as.numeric(all.rma$gse3494.u133a$ER.status)], gse2603 = as.numeric(all.rma$gse2603$Path.ER.status) ) ## correlation of each probe set with ER status all.cor.rma <- sapply(ds, function(i) { cor(t(exprs(all.rma[[i]])), all.er[[i]], use = 'pairwise') }) all.cor.rma.bc <- sapply(ds, function(i) { cor(t(exprs(all.rma.bc[[i]])), all.er[[i]], use = 'pairwise') }) ## correlation of correlations corcor.rma <- cor(all.cor.rma) corcor.rma <- corcor.rma[lower.tri(corcor.rma)] corcor.rma.bc <- cor(all.cor.rma.bc) corcor.rma.bc <- corcor.rma.bc[lower.tri(corcor.rma.bc)] r <- range(corcor.rma, corcor.rma.bc) # 0.5461428 0.8262959 ############# improvement in CC on the gse3494.u133a vs. Sotiriou r2 <- range(all.cor.rma [,'gse3494.u133a'], all.cor.rma [,'sotiriou'], all.cor.rma.bc[,'gse3494.u133a'], all.cor.rma.bc[,'sotiriou'] ) # [1] -0.5466290 0.6683921 h1 <- hist2(all.cor.rma[,'gse3494.u133a'], all.cor.rma[,'sotiriou'], xlim = r2, ylim = r2, plot = FALSE) h2 <- hist2(all.cor.rma.bc[,'gse3494.u133a'], all.cor.rma.bc[,'sotiriou'], xlim = r2, ylim = r2, plot = FALSE) map <- colormap(cbind(h1$z, h2$z)) ## how much does CoC improve? cor(all.cor.rma [,'gse3494.u133a'], all.cor.rma [,'sotiriou']) # 0.5461428 cor(all.cor.rma.bc[,'gse3494.u133a'], all.cor.rma.bc[,'sotiriou']) # 0.7553619 ############## make fig 6 ## want to color the gse3494.u133a/sotiriou pair RED myCol <- rep(1, 10) myCol[8] <- 2 pdf('Fig6.pdf', width = 10, height = 3) layout(matrix(1:4, nrow = 1), width = c(1,1,1,0.3)) par(mar = c(4.1, 4.1, 2, 2)) plot(corcor.rma, corcor.rma.bc, type = 'n', # axes = FALSE, xlim = r, ylim = r, xlab = 'CC before bias correction', ylab = 'CC after bias correction') # axis(1, at = c(0.6, 0.7, 0.8), labels = c('0.6', '0.7', '0.8')) # axis(2, at = c(0.6, 0.7, 0.8), labels = c('0.6', '0.7', '0.8')) box(bty = 'l') abline(0,1, col = 'grey') points(corcor.rma, corcor.rma.bc, col = myCol) mtext('a', side = 3, adj = -0.2, line = 1, cex = par('cex'), font = 2) colorgram(h1, map = map, key = FALSE, xlab = 'correlation w/ ER (Miller data)', ylab = 'correlation w/ ER (Sotiriou data)', ) mtext('b', side = 3, adj = -0.2, line = 1, cex = par('cex'), font = 2) colorgram(h2, map = map, key = FALSE, xlab = 'correlation w/ ER (Miller data)', ylab = 'correlation w/ ER (Sotiriou data)', ) mtext('c', side = 3, adj = -0.2, line = 1, cex = par('cex'), font = 2) par(mar = c(5, 1, 3, 5), yaxs='r', las = 1) drawKey(map, lab='counts') dev.off()