Take your time and read this introduction carefully.
Download the file: data.combined.tab (to your Desktop), take a look at it and open R.
require(affy) Intensity <- read.table(file="Desktop/data.combined.tab") Int.m <- as.matrix(Intensity[,2:23]) Norm.Int.m <- signif(normalize.qspline(Int.m,na.rm=TRUE),digits = 6)
par(mfrow=c(2,2)) # to get a window with room for 4 plots plot(Int.m[,1],Int.m[,5],main="wild type - raw",xlab="no alcohol",ylab="alcohol",log="xy") plot(Norm.Int.m[,1],Norm.Int.m[,5],main="wild type - normalized",xlab="no alcohol",ylab="alcohol",log="xy")... and one of the mutant arrays:
plot(Int.m[,18],Int.m[,11],main="mutant - raw",xlab="no alcohol",ylab="alcohol",log="xy") plot(Norm.Int.m[,18],Norm.Int.m[,11],main="mutant - normalized",xlab="no alcohol",ylab="alcohol",log="xy")
get.pval.ttest <- function(dataf,index1,index2,
datafilter=as.numeric){
f <- function(i) {
return(t.test(datafilter(dataf[i,index1]),
datafilter(dataf[i,index2]))$p.value)}
return(sapply(1:length(dataf[,1]),f))}
pVal.TF <- signif(get.pval.ttest(Norm.Int.m,9:15,16:22), digits = 3) pVal.wt <- signif(get.pval.ttest(Norm.Int.m,5:8,1:4), digits = 3)Order the data in accordance to the P-values from the wildtype experiment:
orders <- order(pVal.wt) ordered.data <- cbind(as.vector(Intensity[,1])[orders],pVal.wt[orders],pVal.TF[orders])Write your results into a file:
write(t(as.matrix(ordered.data)),ncolumns=3,file = "Desktop/t-test.wt.tab")
orders <- order(pVal.TF) ordered.data <- cbind(as.vector(Intensity[,1])[orders],pVal.wt[orders],pVal.TF[orders]) write(t(as.matrix(ordered.data)),ncolumns=3,file = "Desktop/t-test.TF.tab")
ann <- read.delim(file="Desktop/annotation.tab",sep="\t")
fold.m <- Norm.Int.m[,9:15]/Norm.Int.m[,16:22]
fold.mean.m <- signif(apply(fold.m,1,function(x){mean(x,na.rm=TRUE)}), digits=3)
log.fold.mean.m <- signif(log2(fold.mean.m), digits=3)
ordered.data <- cbind(as.vector(Intensity[,1])[orders],
pVal.TF[orders],fold.mean.m[orders],log.fold.mean.m[orders],as.vector(ann[,2])[orders])
write(t(as.matrix(ordered.data)),ncolumns=5,file = "Desktop/t-test.fold.TF.tab")
random <- sample(c(9:22))
permuted.m <- Norm.Int.m[,c(1:8,random)]
pVal.permuted.TF <- get.pval.ttest(permuted.m,9:15,16:22)
log.fold.permuted.mean.m <- log2(apply(permuted.m[,9:15]/permuted.m[,16:22],1,function(x){mean(x,na.rm=TRUE)}))
plot(log.fold.mean.m,pVal.TF,main="Volcano Plot", log="y",xlab="M (log2 fold change)",ylab="P-value", pch="+", col="blue")
points(log.fold.permuted.mean.m,pVal.permuted.TF, type="p", pch="+", col="red")
anova.2F<-function(vector){
anova.df$x<-vector
return(anova(aov(x ~ F1*F2, anova.df))$Pr[1:3])}
TF.f<-factor(c(0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1))
alcohol.f<-factor(c(0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0))
anova.df<-data.frame(F1=TF.f, F2=alcohol.f, x="Na")
BA.2f.pval<-apply(Norm.Int.m, 1, anova.2F) # this command may take a few minutes
pVal1 <- signif(BA.2f.pval[1,], digits = 3)
pVal2 <- signif(BA.2f.pval[2,], digits = 3)
pVal3 <- signif(BA.2f.pval[3,], digits = 3)
orders <- order(pVal2)
ordered.data <- cbind(as.vector(Intensity[,1])[orders],
pVal2[orders],pVal1[orders],pVal3[orders])
write(t(as.matrix(ordered.data)),ncolumns=4,file = "Desktop/ANOVA.tab")
The output is sorted on the first of the three P-values (the third last column of the output file). The top of the list is therefore showing the genes that are significantly affected by the presence of alcohol. The second last column is the P-value, that shows which genes are affected by the transcription-factor mutation. The last column show the P-values from the test where the two conditions are combined, that is which genes that are affected by the presence of the mutation and the alcohol at the same time, not just one of them (a synergistic effect).
alcohol.changed.f<-factor(c(0,0,0,0,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)) # make changes! anova.df<-data.frame(F1=TF.f, F2=alcohol.changed.f, x="Na") BA.2f.changed.pval<-apply(Norm.Int.m, 1, anova.2F) # this command may take a few minutes pVal2.changed <- BA.2f.changed.pval[2,]Plot the "real" P-values against the permuted P-values:
plot(pVal2, pVal2.changed,xlab="P-value",ylab="permuted P-value",log="xy",ylim=range(pVal2)) abline(0,1,col="red") # makes a red line through the diagonal of the plot
plot(pVal2, pVal.TF,xlab="P-value from ANOVA",ylab="P-value from t-test",log="xy",ylim=range(pVal2)) abline(0,1,col="red")
Make as before a smaller file, that contains the gene name, P-value (with and without alcohol), fold change and log2-fold change.
fold.m <- Norm.Int.m[,5:8]/Norm.Int.m[,1:4]
fold.mean.m <- signif(apply(fold.m,1,function(x){mean(x,na.rm=TRUE)}), digits=3)
log.fold.mean.m <- signif(log2(fold.mean.m), digits=3)
orders <- order(pVal2)
ordered.data <- cbind(as.vector(Intensity[,1])[orders],
pVal2[orders],fold.mean.m[orders],log.fold.mean.m[orders],as.vector(ann[,2])[orders])
write(t(as.matrix(ordered.data)),
ncolumns=5,
file = "Desktop/ANOVA.fold.tab")
Compare this to the output from the t-tests, where the effect of alcohol on both the wild type and the transcription-factor mutant were tested.