你这个代码也太长了,实在看不下去
可以怎么改让下面的DESeq2的代码运行出来以后跟DESeq运行的结果一样呢?
##########DESeq
foldChange=2
padj=0.05
setwd("文件路径")
library("DESeq")
rt=read.table("XXX.txt",sep="\t",header=T,check.names=F)
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp),colnames(exp))
data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)
data=avereps(data)
data=data[rowMeans(data)>1,]
data=round(data,0)
group=c(rep("normal",10),rep("tumor",10))
design = factor(group)
newTab = newCountDataSet( data, design )
newTab = estimateSizeFactors(newTab)
newData=counts(newTab, normalized=TRUE )
#have replicates
newTab = estimateDispersions( newTab, fitType = "local")
diff = nbinomTest( newTab, "normal", "tumor")
diff = diff[is.na(diff$padj)==FALSE,]
diff = diff[order(diff$padj),]
write.table( diff, file="DESeqOut.xls",sep="\t",quote=F,row.names=F)
diffSig = diff[(diff$padj < padj & (diff$log2FoldChang>foldChange | diff$log2FoldChange<(-foldChange))),]
write.table( diffSig, file="diffSig.xls",sep="\t",quote=F,row.names=F)
diffUp = diff[(diff$padj < padj & (diff$log2FoldChang>foldChange)),]
write.table( diffUp, file="up.xls",sep="\t",quote=F,row.names=F)
diffDown = diff[(diff$padj < padj & (diff$log2FoldChange<(-foldChange))),]
write.table( diffDown, file="down.xls",sep="\t",quote=F,row.names=F)
normalizeExp=rbind(id=colnames(newData),newData)
write.table(normalizeExp,file="normalizeExp.txt",sep="\t",quote=F,col.names=F)
diffExp=rbind(id=colnames(newData),newData[diffSig$id,])
write.table(diffExp,file="diffmRNAExp.txt",sep="\t",quote=F,col.names=F)
############
n=50
hmExp=log10(newData[diffSig$id,]+0.001)
n=n/2
m=n+1
nrow=dim(hmExp)[1]
nrow=nrow-n
hmExp=hmExp[-m:-nrow,]
library('gplots')
hmMat=as.matrix(hmExp)
#pdf(file="heatmap.pdf",width=60,height=60)
par(oma=c(10,3,3,7))
heatmap.2(hmMat,col='bluered', trace="none", key=T, keysize=0.8)
dev.off()
#####
allDiff=diff[is.na(diff$padj)==FALSE,]
xMax=max(-log10(allDiff$padj))+1
yMax=10
plot(-log10(allDiff$padj), allDiff$log2FoldChange, xlab="-log10(padj)",ylab="log2FoldChange",
main="Volcano", xlim=c(0,xMax),ylim=c(-yMax,yMax),yaxs="i",pch=20, cex=0.4)
diffSub=allDiff[allDiff$padj<padj & allDiff$log2FoldChange>foldChange,]
points(-log10(diffSub$padj), diffSub$log2FoldChange, pch=20, col="red",cex=0.4)
diffSub=allDiff[allDiff$padj<padj & allDiff$log2FoldChange<(-foldChange),]
points(-log10(diffSub$padj), diffSub$log2FoldChange, pch=20, col="blue",cex=0.4)
abline(h=0,lty=2,lwd=3)
########DESeq2
#######DESeq2
library("DESeq2") #载入DESeq2包
library("limma") #载入limma包
library("pasilla") #载入pasilla包
setwd("文件路径")
ESCC.data=read.table("mRNA_symbol.txt",header=TRUE,sep="\t")
exprSet = as.matrix(ESCC.data[,-1])
rownames(exprSet)<-ESCC.data$id
coldata<-data.frame(c(rep("normal",10),rep("tumor",10)))
"condition"->names(coldata)
rownames(coldata) = colnames(exprSet)
dds<- DESeqDataSetFromMatrix(countData = exprSet,colData = coldata,design = ~condition)
dds2<-DESeq(dds)
##resultsNames(dds2)
##4.DESeq2特有的normalization方法
## 下面的代码如果不感兴趣不需要运行,就是看看normalization前面的数据分布差异
rld<- rlogTransformation(dds2) #得到经过DESeq2软件normlization的表达矩阵
new_exprSet=assay(rld)
tiff(file="normalization.tiff",width =60,height =60,units ="cm",compression="lzw",bg="white",res=600)
par(cex = 0.7)
n.sample=ncol(exprSet)
if(n.sample>40) par(cex = 0.5)
cols <- rainbow(n.sample*1.2)
par(mfrow = c(2,2))
boxplot(exprSet, col = cols,main="expression value",las=2)
boxplot(new_exprSet, col = cols,main="expression value",las=2)
hist(exprSet)
hist(new_exprSet)
dev.off()
normalizeExp=rbind(id=colnames(rld),rld)
write.table(normalizeExp,file="normalizeExp.txt",sep="\t",quote=F,col.names=F) #输出所有基因校正后的表达值(normalizeExp.txt)
##5.差异表达分析
res<- results(dds2,contrast=c("condition","tumor","normal")) #提取差异分析结果,本案例是tumor/normal组进行比较
resOrdered = res[order(res$padj),] #结果以padj,从小到大排序
resdata = merge(as.data.frame(resOrdered),as.data.frame(counts(dds2,normalize=TRUE)),by="row.names",sort=FALSE) #获取全部基因的差异表达分析结果
write.csv(resdata,file="ESCC_T-N.csv",row.names = F) #得到csv格式的全部基因的差异表达分析结果
diff_up = subset(resdata,padj < 0.05 & (log2FoldChange > 1)) #获取上调的差异表达基因(adjustp < 0.05,并且|log2(foldchange)|>1)
write.csv(diff_up,file="diff_up.csv",row.names = F) #得到csv格式的差异表达基因结果
diff_down = subset(resdata,padj < 0.05 & (log2FoldChange < -1)) #获取下调的差异表达基因(adjustp < 0.05,并且|log2(foldchange)|>1)
write.csv(diff_down,file="diff_down.csv",row.names = F) #得到csv格式的差异表达基因结果