GS-MM散点图用于筛选hub gene,但是之前的图实在是有点丑,放在文章里好像有点拉胯,所以准备重新绘制。前期过程在专题(3)中已经介绍过了,这里只有后期优化。
WGCNA(3):基因模块与性状关联识别重要基因 - 简书 (jianshu.com)
首先看一下,原图长这样,如果是颜色浅一点的module就完全看不清楚点。
0.前期准备
> setwd("D:/RNA-seq/WGCNA/mad0.3")
> library('WGCNA')
> options(stringsAsFactors = FALSE)
> allowWGCNAThreads()
> lnames = load(file = "WGCNA0.3-dataInput.RData")
> lnames = load(file = "networkConstruction-stepByStep.RData")
> nGenes = ncol(datExpr)
> nSamples = nrow(datExpr)
# 指定datTrait中感兴趣的一个性状,这里选择TL
> TL = as.data.frame(datTraits$TL)
> names(TL) = "TL"
# 各基因模块的名字(颜色)
> modNames = substring(names(MEs), 3)
# 计算MM的P值
> geneModuleMembership = as.data.frame(cor(datExpr, MEs, use = "p"))
> MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership
), nSamples))
> names(geneModuleMembership) = paste("MM", modNames, sep="")
> names(MMPvalue) = paste("p.MM", modNames, sep="")
# 计算性状和基因表达量之间的相关性(GS)
> geneTraitSignificance = as.data.frame(cor(datExpr, TL, use = "p"))
> GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance),
nSamples))
> names(geneTraitSignificance) = paste("GS.", names(TL), sep="")
> names(GSPvalue) = paste("p.GS.", names(TL), sep="")
> module = "yellow"
1.更改点的形状
通过pch()进行调整
> verboseScatterplot(abs(geneModuleMembership[moduleGenes,
column]),abs(geneTraitSignificance[moduleGenes, 1]), xlab =
paste("Module Membership in", module, "module"), ylab = "Gene
significance for TL", main = paste("Module membership
vs. gene significance"), pch = 20)
2.加入筛选hub gene的标准线
- abline():h水平线,v垂直线,lwd设置线粗细
> verboseScatterplot(abs(geneModuleMembership[moduleGenes,
column]),abs(geneTraitSignificance[moduleGenes, 1]), xlab =
paste("Module Membership in", module, "module"), ylab = "Gene
significance for TL", main = paste("Module membership
vs. gene significance"), pch = 20,col="grey")
> abline(h=0.2,v=0.8,col="red",lwd=1.5)
3.区分点的颜色
这个图出来,我还是觉得不是很清楚,点实在是太多了,是不是可以筛选出的hub gene用鲜艳的颜色,其余基因用灰色。
3.1 提取感兴趣的module内所有的基因名称
> module = "yellow"
> column = match(module, modNames)
> moduleGenes = moduleColors==module
> table(moduleGenes)
moduleGenes
FALSE TRUE
38899 2726
> yellow_module<-as.data.frame(dimnames(data.frame(datExpr))[[2]][moduleGenes])
> names(yellow_module)="genename"
3.2 筛选hub gene
> MM<-abs(geneModuleMembership[moduleGenes,column])
> GS<-abs(geneTraitSignificance[moduleGenes, 1])
> c<-as.data.frame(cbind(MM,GS))
> rownames(c)=yellow_module$genename
> head(c)
MM GS
evm.model.scaffold_735.38 0.3925084 0.44454629
evm.model.scaffold_439.8 0.4592395 0.39223841
evm.model.scaffold_9970.56 0.3424278 0.28887226
evm.model.scaffold_1431.175 0.4297850 0.23670725
evm.model.scaffold_248.130 0.5169041 0.33374737
evm.model.scaffold_1265.20 0.3013020 0.02636731
> yellow_hub <-abs(c$MM)>0.8&abs(c$GS)>0.2
> table(yellow_hub)
yellow_hub
FALSE TRUE
2678 48
> write.csv(yellow_hub, "hubgene_MMGS_yellow.csv")
3.3 对基因进行分组
> yellow_hub<-read.csv("hubgene_MMGS_yellow.csv")
> head(yellow_hub)
X MM GS
1 evm.model.scaffold_759.263 0.8548075 0.6054175
2 evm.model.scaffold_644.298 0.8350027 0.5279349
3 evm.model.scaffold_2513.88 0.8436767 0.5192357
4 evm.model.scaffold_313.69 0.8499680 0.4964764
5 evm.model.scaffold_761.85 0.8757359 0.6455765
6 evm.model.scaffold_10124.197 0.8141564 0.6274983
# hub基因和module内全部基因进行匹配,匹配成功返回1,没有匹配到的返回0
> match<- yellow_module$genename %in% yellow_hub$X
# 将匹配信息添加到散点图矩阵最后一列
> c$group<-match
> head(c)
MM GS group
evm.model.scaffold_735.38 0.3925084 0.44454629 FALSE
evm.model.scaffold_439.8 0.4592395 0.39223841 FALSE
evm.model.scaffold_9970.56 0.3424278 0.28887226 FALSE
evm.model.scaffold_1431.175 0.4297850 0.23670725 FALSE
evm.model.scaffold_248.130 0.5169041 0.33374737 FALSE
evm.model.scaffold_1265.20 0.3013020 0.02636731 FALSE
3.4 利用ggplot2绘图
> library(ggplot2)
> pdf("MM vs. GS_yellow_TL.pdf",width = 7,height = 7)
> ggplot(data=c, aes(x=MM, y=GS, color=group))+geom_point(size=1.5)+scale_colour_manual(values=c("grey60", "#DE6757"))+ theme_bw()+ theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+ labs(x="Module Membership in yellow module", y="Gene significance for TL",title = "Module membership vs. gene significance ")+theme(axis.title.x =element_text(size=14), axis.title.y=element_text(size=14),axis.text = element_text(size = 12),axis.text.x = element_text(colour = "black"),axis.text.y = element_text(colour = "black"),plot.title = element_text(hjust = 0.5,size = 16,face = "bold"),plot.margin = unit(rep(2,4),'lines')) +theme(legend.position = 'none')+geom_hline(aes(yintercept=0.2),colour="#5B9BD5",lwd=1,linetype=5)+geom_vline(aes(xintercept=0.8),colour="#5B9BD5",lwd=1,linetype=5)
> dev.off()