多线程计算
library(parallel)
查看电脑当前核数
cl.cores <- detectCores()
cl.cores
初始化10核心集群
cl <- makeCluster(10)
差异分析-mRNA####
library(tidyverse)
library("limma")
library("edgeR")
数据加载
load("diffmRNA//mRNA.Rdata")
expermRNA[1:3,1:3]
根据列名区分肿瘤组和对照组
group <- ifelse(str_sub(colnames(expermRNA), 14, 15) == "01",
"tumor", "control")
table(group)
group_list = factor(group)#先前是字符串,现在是因子了
差异分析
design <- model.matrix(~ 0 + group_list) #构建分组矩阵,这里面不用纠结~0是什么含义,我见了这么多,发现它都是用~0的,所以用就行了,不必多想
rownames(design) <- colnames(expermRNA)
colnames(design) <- levels(group_list)
expermRNA = expermRNA[rowMeans(expermRNA) > 1, ] #过滤低表达基因
edgeR
y <- DGEList(counts = expermRNA,
group = group_list) #构建DGElist对象
y <- calcNormFactors(y) #TMM标准化
y <- estimateCommonDisp(y) #估算离散值
y <- estimateTagwiseDisp(y)
et <- exactTest(y, pair = levels(group_list)) #差异分析
gene1 <- decideTestsDGE(et,
p.value = 0.01,
lfc = 2) #设置阈值
summary(gene1) #查看差异分析结果
提取所有差异分析结果
topTags(et)
ordered_tags <- topTags(et, n = 100000)
allDEG = ordered_tags$table
allDEG = allDEG[is.na(allDEG$FDR) == FALSE, ]
write.table(allDEG, 'diffmRNA//allDEG.txt', sep = '\t')#这是总的差异基因,但是并不是全部会用到,还需要进行筛选,这里进行了数据保存
提取筛选后的结果
diff_signif <- allDEG %>% rownames_to_column("rownames")
filter(.,PValue < 0.01, abs(logFC)> 2) %>%
arrange(.,logFC)
rownames(diff_signif) <- diff_signif[,1]
diff_signif<- diff_signif[,-1]
write.csv(diff_signif,file = 'diffmRNA//DIFmRNA.txt')
区分高低表达
sigmRNA <- diff_signif %>%
rownames_to_column("mRNA") %>%
mutate(Regulation = ifelse(logFC < 0, "DOWN", "UP")) %>%
select(mRNA,Regulation)
write.csv(sigmRNA,file = 'diffmRNA//sigmRNA.txt')
#rownames_to_column 可以将行名自动添加为一列,并命名为“mRNA”
#mutate 函数新建了一个名为“Regulation”的列
volcano
allDiff <- ordered_tags$table
pdf("diffmRNA//vol.pdf",12,12)
allDiff2=allDiff[-(allDiff$FDR==0),]
xMax=max(-log10(allDiff2$FDR))+1
yMax=12
plot(-log10(allDiff2$FDR),
allDiff2$logFC,
xlab="-log10(FDR)",
ylab="logFC",
main="Volcano",
xlim=c(0,xMax),
ylim=c(-yMax,yMax),
yaxs="i",pch=20,
col="gray", cex=0.4)
diffSub=allDiff[allDiff$FDR<0.01 & allDiff$logFC>2,]
points(-log10(diffSub$FDR),
diffSub$logFC, pch=20,
col="red",cex=0.4)
diffSub=allDiff[allDiff$FDR<0.01 & allDiff$logFC<(-2),]
points(-log10(diffSub$FDR),
diffSub$logFC, pch=20,
col="skyblue",cex=0.4)
abline(h=0,lty=2,lwd=3)
dev.off()
heatmap
newData=y$pseudo.counts
heatmapData <- newData[rownames(diff_signif),]
hmExp=log10(heatmapData+0.001)
library('gplots')
hmMat=as.matrix(hmExp)
pdf(file="diffmRNA//heatmap.pdf",12,15)
par(oma=c(10,3,3,7))
heatmap.2(hmMat,
col='bluered',trace="none")#这个函数运算的时间会比较长的哦
dev.off()