3 关于DESeq2分析RNA-seq差异基因的代码

可以怎么改让下面的DESeq2的代码运行出来以后跟DESeq运行的结果一样呢?

##########DESeq

foldChange=2

padj=0.05

setwd("文件路径")  

library("DESeq")

rt=read.table("XXX.txt",sep="\t",header=T,check.names=F)  

rt=as.matrix(rt)

rownames(rt)=rt[,1]

exp=rt[,2:ncol(rt)]

dimnames=list(rownames(exp),colnames(exp))

data=matrix(as.numeric(as.matrix(exp)),nrow=nrow(exp),dimnames=dimnames)

data=avereps(data)

data=data[rowMeans(data)>1,] 

data=round(data,0)


group=c(rep("normal",10),rep("tumor",10))  

design = factor(group)

newTab = newCountDataSet( data, design )

newTab = estimateSizeFactors(newTab)

newData=counts(newTab, normalized=TRUE ) 

#have replicates

newTab = estimateDispersions( newTab, fitType = "local")

diff = nbinomTest( newTab, "normal", "tumor")

diff = diff[is.na(diff$padj)==FALSE,]

diff = diff[order(diff$padj),]

write.table( diff, file="DESeqOut.xls",sep="\t",quote=F,row.names=F)

diffSig = diff[(diff$padj < padj & (diff$log2FoldChang>foldChange | diff$log2FoldChange<(-foldChange))),]

write.table( diffSig, file="diffSig.xls",sep="\t",quote=F,row.names=F)

diffUp = diff[(diff$padj < padj & (diff$log2FoldChang>foldChange)),]

write.table( diffUp, file="up.xls",sep="\t",quote=F,row.names=F)

diffDown = diff[(diff$padj < padj & (diff$log2FoldChange<(-foldChange))),]

write.table( diffDown, file="down.xls",sep="\t",quote=F,row.names=F)


normalizeExp=rbind(id=colnames(newData),newData)

write.table(normalizeExp,file="normalizeExp.txt",sep="\t",quote=F,col.names=F)   

diffExp=rbind(id=colnames(newData),newData[diffSig$id,])

write.table(diffExp,file="diffmRNAExp.txt",sep="\t",quote=F,col.names=F)         

############

n=50         

hmExp=log10(newData[diffSig$id,]+0.001)

n=n/2

m=n+1

nrow=dim(hmExp)[1]

nrow=nrow-n

hmExp=hmExp[-m:-nrow,]

library('gplots')

hmMat=as.matrix(hmExp)

#pdf(file="heatmap.pdf",width=60,height=60)

par(oma=c(10,3,3,7))

heatmap.2(hmMat,col='bluered', trace="none", key=T, keysize=0.8)

dev.off()


#####

allDiff=diff[is.na(diff$padj)==FALSE,]

xMax=max(-log10(allDiff$padj))+1

yMax=10

plot(-log10(allDiff$padj), allDiff$log2FoldChange, xlab="-log10(padj)",ylab="log2FoldChange",

     main="Volcano", xlim=c(0,xMax),ylim=c(-yMax,yMax),yaxs="i",pch=20, cex=0.4)

diffSub=allDiff[allDiff$padj<padj & allDiff$log2FoldChange>foldChange,]

points(-log10(diffSub$padj), diffSub$log2FoldChange, pch=20, col="red",cex=0.4)

diffSub=allDiff[allDiff$padj<padj & allDiff$log2FoldChange<(-foldChange),]

points(-log10(diffSub$padj), diffSub$log2FoldChange, pch=20, col="blue",cex=0.4)

abline(h=0,lty=2,lwd=3)




########DESeq2

#######DESeq2

library("DESeq2")    #载入DESeq2包

library("limma")     #载入limma包

library("pasilla")   #载入pasilla包

setwd("文件路径") 

ESCC.data=read.table("mRNA_symbol.txt",header=TRUE,sep="\t")  

exprSet = as.matrix(ESCC.data[,-1])                        

rownames(exprSet)<-ESCC.data$id                             

coldata<-data.frame(c(rep("normal",10),rep("tumor",10)))      

"condition"->names(coldata)                            

rownames(coldata) = colnames(exprSet)                

dds<- DESeqDataSetFromMatrix(countData = exprSet,colData = coldata,design = ~condition)


dds2<-DESeq(dds)                                                                   

##resultsNames(dds2)



##4.DESeq2特有的normalization方法

## 下面的代码如果不感兴趣不需要运行,就是看看normalization前面的数据分布差异

rld<- rlogTransformation(dds2)                                                          #得到经过DESeq2软件normlization的表达矩阵

new_exprSet=assay(rld)

tiff(file="normalization.tiff",width =60,height =60,units ="cm",compression="lzw",bg="white",res=600)

par(cex = 0.7)

n.sample=ncol(exprSet)

if(n.sample>40) par(cex = 0.5)

cols <- rainbow(n.sample*1.2)

par(mfrow = c(2,2))

boxplot(exprSet, col = cols,main="expression value",las=2)

boxplot(new_exprSet, col = cols,main="expression value",las=2)

hist(exprSet)

hist(new_exprSet)

dev.off()

normalizeExp=rbind(id=colnames(rld),rld)

write.table(normalizeExp,file="normalizeExp.txt",sep="\t",quote=F,col.names=F)          #输出所有基因校正后的表达值(normalizeExp.txt)



##5.差异表达分析

res<- results(dds2,contrast=c("condition","tumor","normal"))                            #提取差异分析结果,本案例是tumor/normal组进行比较

resOrdered = res[order(res$padj),]                                                      #结果以padj,从小到大排序

resdata = merge(as.data.frame(resOrdered),as.data.frame(counts(dds2,normalize=TRUE)),by="row.names",sort=FALSE)  #获取全部基因的差异表达分析结果

write.csv(resdata,file="ESCC_T-N.csv",row.names = F)                                                       #得到csv格式的全部基因的差异表达分析结果


diff_up = subset(resdata,padj < 0.05 & (log2FoldChange > 1))                                               #获取上调的差异表达基因(adjustp < 0.05,并且|log2(foldchange)|>1)

write.csv(diff_up,file="diff_up.csv",row.names = F)                                                        #得到csv格式的差异表达基因结果


diff_down = subset(resdata,padj < 0.05 & (log2FoldChange < -1))                                            #获取下调的差异表达基因(adjustp < 0.05,并且|log2(foldchange)|>1)

write.csv(diff_down,file="diff_down.csv",row.names = F)                                                    #得到csv格式的差异表达基因结果


请先 登录 后评论

1 个回答

王晓明 - 武大硕士

你这个代码也太长了,实在看不下去

请先 登录 后评论
  • 2 关注
  • 2 收藏,10538 浏览
  • youjuzhe 提出于 2018-02-02 14:59