WGCNA代码

WGCNA

rm(list = ls())

#加载R包

library(WGCNA)

library(tidyr)

library(ggplot2)

library(gridExtra)

setwd("D:\\项目探索\\水稻EDP\\文献\\WGCNA数据结果20190426")

#=================================================================#

#

# 1.导入表达数据

#

#=================================================================#

gene_exp <- read.table('D:\\项目探索\\水稻EDP\\文献\\WGCNA数据结果20190426\\DSC.txt',

                         sep = '\t',

                         header = T,

                         stringsAsFactors = F,


                         #第一列作为行名

                         row.names = 1)

gene_exp[is.na(gene_exp)] <- 0 # 将NA转换为0

# 根据方差筛选

m.vars=apply(gene_exp,1,var)

expro.upper=gene_exp[which(m.vars>quantile(m.vars, probs = seq(0, 1, 0.25))[4]),]

dim(expro.upper)

datExpr <- t(expro.upper) # 在WGCNA中需要转置表达矩阵

gsg = goodSamplesGenes(datExpr,verbose = 3)

gsg$allOK

#查看行和列

dim(datExpr)

# 聚类分析是否存在离群值

sampleTree = hclust(dist(datExpr),method = "average")

sizeGrWindow(12,9)

par(cex=0.6)

par(mar = c(0,4,2,0))

plot(sampleTree,

     main = "Sample clustering to detect outliners",

     sub = "",xlab = "",cex.lab=1.5,

     cex.axis=1.5,cex.main = 2)

# 存在离群样本时可以剔除

if(F){abline(h = 15,col = "red")

clust = cutreeStatic(sampleTree,cutHeight = 15,minSize = 10)

table(clust)

keepSamples = (clust==1)

datExpr =datExpr0[keepSamples,]

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)

}

#=================================================================#

#

# 2.寻找最佳β值

#

#=================================================================#

# 选择并构建一个软阈值向量

powers = c(c(1:10),seq(from = 12,to = 20,by=2)

# 执行TOM功能

sft = pickSoftThreshold(datExpr,powerVector = powers,verbose = 5)

# 绘图

sizeGrWindow(9,5)

par(mfrow = c(1,2)) # 一页多图

cex1 = 0.9

# Scale-free topology fit index as a function of the soft-thresholding power

plot(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],

xlab="Soft Threshold(power)",ylab = "Scale Free Topology Model Fit,signed R^2",type="n",

main = paste("Scale independence"))

text(sft$fitIndices[,1],-sign(sft$fitIndices[,3])*sft$fitIndices[,2],labels=powers,cex=cex1,col="red")

abline(h = 0.90,col = "red")

#  Mean connectivity as a function of the soft-thresholding power

plot(sft$fitIndices[,1],sft$fitIndices[,5],xlab="Soft Threshold (power)",ylab ="Mean Connectivity",type = "n",main = paste("Mean connectivity"))

text(sft$fitIndices[,1],sft$fitIndices[,5],labels=powers,cex = cex1,col="red")

#=================================================================#

#

# 3.一步法构建网络模块

#3.1 minModuleSize指定Module所含基因数最少

#3.2 mergeCutHeight指定合并Module的阈值

#3.3 列表net中包含了许多信息

#3.4 可以通过recutBlockwiseTrees函数来修改模块的一些参数,而不用重新计算聚类树

#

#=================================================================#

net = blockwiseModules(datExpr,power = sft$powerEstimate,

TOMType = "unsigned",minModuleSize = 30,reassignThreshold = 0,mergeCutHeight = 0.25,

numericLabels =TRUE,pamRespectsDendro = FALSE,saveTOMs = TRUE,saveTOMFileBase = "RiceTOM",

verbose = 3)

table(net$colors)

# 绘图

sizeGrWindow(12,9)

# 将标签转换为颜色

mergedColors = labels2colors(net$colors)

# 绘制树图及模块颜色图

plotDendroAndColors(net$dendrograms[[1]],mergeColors[net$blockGenes[[1]],"Module colors",dendroLabels = FALSE,hang = 0.03,addGuide = TRUE,guideHang = 0.05)

# 保存相关数据

moduleLabels = net$colors

moduleColors = labels2colors(net$colors)

MEs = net$MEs

geneTree = net$dendrograms[[1:2]] # 可能会存在基因较多,默认将基因分成了两部分

#=================================================================#

# 3.步步法构建

# 3.构建网络

# 3.1.计算相关系数

# 3.2.计算邻接矩阵

# 3.3.计算TOM矩阵

# 3.4.聚类并且划分模块

# 3.5.合并相似模块

#=================================================================#

# 计算邻接矩阵

softpower = sft$powerEstimate

adjacency = adjacency(datExpr,power = softPower)

# 通过邻接矩阵计算TOM矩阵

TOM = TOMsimilarity(adjacency)

dissTOM = 1 - TOM # 此为距离矩阵

# 通过距离矩阵进行聚类分析基因

geneTree = hclust(as.dist(dissTOM),method = "average")

sizeGrWindow(12,9)

plot(geneTree,xlab="",sub="",main="Gene clustering on TOM-based dissimilarity",labels=FALSE,hang = 0.04)

# 设置模块中基因最小数

minModuleSize = 30

# 通过动态树剪切鉴定出模块

dynamicMods = cutreeDynamic(dendro = geneTree,disM = dissTOM,deepSplit = 2,pamRespectsDendro = FALSE,minClusterSize = minModuleSize)

table(dynamicMods)

# 绘图

dynamicColors = labels2colors(dynamicMods)

table(dynamicColors)

sizeGrWindow(8,6)

plotDendroAndColors(geneTree,dynamicColors,"Dynamic Tree Cut",dendroLabels = FALSE,hang = 0.03,addGuide = TRUE,guideHang = 0.05,main = " Gene dendrogram and module colors")

# 合并表达近似的Modules

# 计算Modules的特征基因

MEList = moduleEigengenes(datExpr,color = dynamicColors)

MEs = MEList$eigengenes

# 计算Modules之间的相关性

MEDiss = 1 - cor(MEs)

# 聚类特征基因

METree = hclust(as.dist(MEDiss),method = "average")

# 绘图

sizeGrWindow(7,6)

plot(METree,main = "Clustering of eigengenes",xlab="",sub="")

# 自动合并

MEDissThres = 0.25

abline(h=MEDissThres,col = "red")

merge = mergeCloseModules(datExpr,dynamicColors,cutHeight = MEDissThres,verbose = 3)

mergedColors = merge$colors

mergedMEs = merge$newMEs

# 绘图对比合并前后模块

sizeGrWindow(12,9)

plotDendroAndColors(geneTree,cbind(dynamicColors,mergedColors),c("Dynamic Tree Cut","Merged dynamic"),dendroLabels = FALSE,hang =0.03,addGuide = TRUE,guideHang = 0.05)

# 保存相关的变量数据

moduleColors = mergedColors

# 转换为数字标签

colorOrder = c("grey",standardColors(50))

moduleLabels = match(moduleColors,colorOrder)-1

MEs = mergedMEs

#=================================================================#

#

# 3.大数据集处理

# 基本思想:使用两层聚类

#

#=================================================================#

#  Block-wise network construction and module detection

bwnet <- blockwiseModules(datExpr,maxBlockSize = 2000,   # 指定电脑最大可运行的模块

power = sft$powerEstimate,TOMtype ="unsigned",minModuleSize = 30,

reassignThreshold = 0, mergeCutHeight = 0.25,numericLabels = TRUE,

saveTOMs = TRUE,saveTOMFileBase = "RiceTOM-blockwise",

verbose = 3)

bwLabels = matchLabels(bwnet$colors,moduleLabels)

bwModuleColors = labels2colors(bwLabels)

table(bwLabels)

# 绘图

sizeGrWindow(6,6)

# block1绘图

plotDendroAndColors(bwnet$dendrograms[[1]], bwModuleColors[bwnet$blockGenes[[1]]], "Module colors", main = "Gene dendrogram and module colors in block 1", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)

# block2绘图

plotDendroAndColors(bwnet$dendrograms[[2]], bwModuleColors[bwnet$blockGenes[[2]]], "Module colors", main = "Gene dendrogram and module colors in block 2", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)

# 绘图比较single block和block-wise的区别

sizeGrWindow(12,9) 

plotDendroAndColors(geneTree, cbind(moduleColors, bwModuleColors), c("Single block", "2 blocks"), main = "Single block gene dendrogram and module colors", dendroLabels = FALSE, hang = 0.03, addGuide = TRUE, guideHang = 0.05)

# 通过Eigengenes比较single block和block-wise的区别

singleBlockMEs = moduleEigengenes(datExpr, moduleColors)$eigengenes

blockwiseMEs = moduleEigengenes(datExpr, bwModuleColors)$eigengenes

single2blockwise = match(names(singleBlockMEs), names(blockwiseMEs))

signif(diag(cor(blockwiseMEs[, single2blockwise], singleBlockMEs)), 3)

#=================================================================#

#

# 4.导入性状数据

#

#=================================================================#

traitData = read.csv("ClinicalTraits.csv")

dim(traitData) 

names(traitData)

# 与表达数据进行校准后构建性状数据框

RiceSamples = rownames(datExpr)

traitRows = match(RiceSamples, allTraits$Sample)

datTraits = allTraits[traitRows, -1]

rownames(datTraits) = allTraits[traitRows, 1]

#=================================================================#

#

# 5.模块与性状的相关关系

#

#=================================================================#

# 量化Module-trait的关系

nGenes = ncol(datExpr)

nSamples = nrow(datExpr)

MEs0 = moduleEigengenes(datExpr,moduleColors)$eigengenes

MEs = orderMEs(MEs0)

moduleTraitCor = cor(MEs,datTraits,use = "p")

moduleTraitPvalue = corPvalueStudent(moduleTrsitCor,nSamples)

# 绘图

sizeGrWindow(10,6)

textMatrix = paste(signif(moduleTraitCor, 2), "\n(", signif(moduleTraitPvalue, 1), ")", sep = "")

dim(textMatrix) = dim(moduleTraitCor)

par(mar = c(6, 8.5, 3, 3))

labeledHeatmap(Matrix = moduleTraitCor, xLabels = names(datTraits), yLabels = names(MEs), ySymbols = names(MEs), colorLabels = FALSE, colors = greenWhiteRed(50), textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.5, zlim = c(-1,1), main = paste("Module-trait relationships"))

#=================================================================#

#

# 6.基因与模块、性状的相关关系

#

#=================================================================#

# 确定基因与模块的关系-MM

modNames = substring(names(MEs),3)  # 提取模块数字标签

geneModuleMembership = as.data.frame(cor(datExpr,MEs,use = "p"))

MMPvalue = as.data.frame(corPvalueStudent(as.matrix(geneModuleMembership), nSamples)) # 通过计算每个Module的Eigengenes与基因的关系

names(geneModuleMembership) = paste("MM", modNames, sep="")

names(MMPvalue) = paste("p.MM", modNames, sep="")

# 提取某个特定的性状

Treatment = as.data.frame(datTraits$Treatment)

names(weight) = "Treatment"

# 确定基因与性状的关系-GS

geneTraitSignificance = as.data.frame(cor(datExpr, Treatment, use = "p")) 

GSPvalue = as.data.frame(corPvalueStudent(as.matrix(geneTraitSignificance), nSamples))

names(geneTraitSignificance) = paste("GS.", names(Treatment), sep="")

names(GSPvalue) = paste("p.GS.", names(Treatment), sep="")

#=================================================================#

#

# 7.获取与性状关联度大的特定模块中GS高且MM高的基因

#

#=================================================================#

# 绘制特定模块GS和MM热图

module = "saddlebrown"

column = match(module,modNames)

moduleGenes = moduleColors==module # 获取特定列的数据,只是一种数据提取方法而已

sizeGrWindow(7, 7)

par(mfrow = c(1,1))

verboseScatterplot(abs(geneModuleMembership[moduleGenes, column]), abs(geneTraitSignificance[moduleGenes, 1]), xlab = paste("Module Membership in", module, "module"), ylab = "Gene significance for body weight", main = paste("Module membership vs. gene significance\n"), cex.main = 1.2, cex.lab = 1.2, cex.axis = 1.2, col = module)

# 获取特定模块的特定基因

probes = names(datExpr)[moduleColors==module]

# 输出总体结果数据框

geneInfo0 = data.frame(substanceBXH = probes, geneSymbol = annot$gene_symbol[probes2annot], LocusLinkID = annot$LocusLinkID[probes2annot], moduleColor = moduleColors, geneTraitSignificance, GSPvalue)

# 根据与性状显著相关进行排序

modOrder = order(-abs(cor(MEs, weight, use = "p")))

# 添加模块关系信息

for (mod in 1:ncol(geneModuleMembership)) 

{ oldNames = names(geneInfo0) geneInfo0 = data.frame(geneInfo0, geneModuleMembership[, modOrder[mod]], MMPvalue[, modOrder[mod]]); names(geneInfo0) = c(oldNames, paste("MM.", modNames[modOrder[mod]], sep=""), paste("p.MM.", modNames[modOrder[mod]], sep="")) }

# 第一根据颜色第二根据基因性状显著性进行排序

geneOrder = order(geneInfo0$moduleColor, -abs(geneInfo0$GS.weight))

geneInfo = geneInfo0[geneOrder, ]

#=================================================================#

#

# 8.进行后续分析GO功能注释等

#

#=================================================================#

# 选择感兴趣的Modules

intModules = c("brown", "red", "salmon")

for (module in intModules)

 {

 # Select module probes 

modGenes = (moduleColors==module) 

# Get their entrez ID codes 

modLLIDs = allLLIDs[modGenes]

# Write them into a file 

fileName = paste("LocusLinkIDs-", module, ".txt", sep="")

write.table(as.data.frame(modLLIDs), file = fileName, row.names = FALSE, col.names = FALSE) 

}

©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 220,137评论 6 511
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 93,824评论 3 396
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 166,465评论 0 357
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 59,131评论 1 295
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 68,140评论 6 397
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 51,895评论 1 308
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 40,535评论 3 420
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 39,435评论 0 276
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 45,952评论 1 319
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 38,081评论 3 340
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 40,210评论 1 352
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 35,896评论 5 347
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 41,552评论 3 331
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 32,089评论 0 23
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 33,198评论 1 272
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 48,531评论 3 375
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 45,209评论 2 357

推荐阅读更多精彩内容