# Get empirical length distribution of aCGH data (from DNAcopy segmentation of bc data) load("length.distr.RData") # Generate list structure for storing simulated data simulated.data <- list() # Function for generation of simulated aCGH data generate.data <- function(le) { data<-numeric() i<-0 while(ile) { l <- le-i} data<-rbind(data,c(state,l,1)) i<-i+l } return(data) } # Generate simulated data with length 500 for (i in 1:100){ class.matrix <- NULL class.matrix[[1]] <- generate.data(500) class.matrix[[2]] <- generate.data(500) # if 2 true.predictions <- unlist(apply(class.matrix[[1]],1,function(x){rep(x[1],x[2])}))!=unlist(apply(class.matrix[[2]],1,function(x){rep(x[1],x[2])})) # Class vector classes <- sample(c(0,1),20,replace=T) # Class specific vectors of probability of each aberant segment class.matrix[[1]][which(class.matrix[[1]][,1]!=2),3] <- 0.7 class.matrix[[2]][which(class.matrix[[2]][,1]!=2),3] <- 0.7 # Data matrix datamatrix <- matrix(NA,nrow=500,ncol=20) samples<-list() # Add individual noise and outliers to each column (sample) for ( t in 1:20){ # Class of current sample class <- classes[t] # Template for current class data <- class.matrix[[class+1]] # Make heterogeneous data with probabilities of aberant segments x[3] from template data[,1] <- apply(data,1,function(x){ sample(c(2,x[1]),1,prob=c(1-x[3],x[3])) }) data <- unlist(apply(data,1,function(x){rep(x[1],x[2])})) # Get proportion of normal cells (copy number 2, log2 ratio = 0) p <- runif(1, min=0.3, max=0.7) # Get log2 ratios given a copy number change in the 1-p proportion of tumor cells datamatrix[,t]<-log2((data*p+2*(1-p))/2) sdev <- runif(1,0.1,0.2) # Add noise datamatrix[,t]<-datamatrix[,t]+rnorm(length(datamatrix[,t]),mean=0,sd=sdev) # Save parameter values samples[[t]] <- data.frame(p=p,log2.ratios=data) } simulated.data[[i]] <- list(class.matrix=class.matrix,true.predictions=true.predictions,classes=classes,datamatrix=datamatrix,samples=samples) } names(simulated.data) <- paste("dataset",1:100,sep="") save(simulated.data,file="simulated.data.RData")