加权共表达分析总结

经典的WGCNA分析
用于疾病两组比较的MEGENA
用于两组处理比较的DGCA
整合工具CEMiTool
CEMiTool的一段代码如下:

## ----style, echo=FALSE, results="asis", message=FALSE--------------------
knitr::opts_chunk$set(tidy = FALSE,
                      warning = FALSE,
                      message = FALSE,
                      cache=TRUE)

## ------------------------------------------------------------------------
BiocManager::install("CEMiTool")
library("CEMiTool")
# load expression data
data(expr0)
head(expr0[,1:4])

## ---- results='hide'-----------------------------------------------------
cem <- cemitool(expr0)

## ------------------------------------------------------------------------
print(cem)
# or, more conveniently, just call `cem`
cem

## ------------------------------------------------------------------------
# inspect modules
nmodules(cem)
head(module_genes(cem))

## ---- eval=FALSE---------------------------------------------------------
#  generate_report(cem, output_format=c("pdf_document", "html_document"))

## ------------------------------------------------------------------------
write_files(cem, directory="./Tables", force=TRUE)

## ------------------------------------------------------------------------
# load your sample annotation data
data(sample_annot)
head(sample_annot)

## ---- results='hide'-----------------------------------------------------
# run cemitool with sample annotation
cem <- cemitool(expr0,sample_annot)

## ------------------------------------------------------------------------
sample_annotation(cem, 
                  sample_name_column="SampleName", 
                  class_column="Class") <- sample_annot

## ------------------------------------------------------------------------
# generate heatmap of gene set enrichment analysis
cem <- mod_gsea(cem)
cem <- plot_gsea(cem)
show_plot(cem, "gsea")

## ------------------------------------------------------------------------
# plot gene expression within each module
cem <- plot_profile(cem)
plots <- show_plot(cem, "profile")
plots[1]

## ------------------------------------------------------------------------
# read GMT file
gmt_fname <- system.file("extdata", "pathways.gmt", package = "CEMiTool")
gmt_in <- read_gmt(gmt_fname)

## ------------------------------------------------------------------------
# perform over representation analysis
cem <- mod_ora(cem, gmt_in)

## ------------------------------------------------------------------------
# plot ora results
cem <- plot_ora(cem)
plots <- show_plot(cem, "ora")
plots[1]

## ------------------------------------------------------------------------
# read interactions
int_fname <- system.file("extdata", "interactions.tsv", package = "CEMiTool")
int_df <- read.delim(int_fname)
head(int_df)

## ------------------------------------------------------------------------
# plot interactions
library(ggplot2)
interactions_data(cem) <- int_df # add interactions
cem <- plot_interactions(cem) # generate plot
plots <- show_plot(cem, "interaction") # view the plot for the first module
plots[3]

## ---- eval=FALSE---------------------------------------------------------
#  # run cemitool
#  library(ggplot2)
#  cem <- cemitool(expr0, sample_annot, gmt_in, interactions=int_df,
#                  filter=TRUE, plot=TRUE, verbose=TRUE)
#  # create report as html document
#  generate_report(cem, directory="./Report")
#  
#  # write analysis results into files
#  write_files(cem, directory="./Tables")
#  
#  # save all plots
#  save_plots(cem, "all", directory="./Plots")

## ---- eval=FALSE---------------------------------------------------------
#  diagnostic_report(cem, directory="./Diagnostics")

MEGENA使用

#!/usr/bin/env Rscript
suppressMessages(library(MEGENA))
rm(list=ls())
FC=0
p=0.05
suppressMessages(library(tidyverse))
n.cores <- 19; # number of cores/threads to call for PCP
doPar <- T; # do we want to parallelize?
method = "pearson" # method for correlation. either pearson or spearman. 
FDR.cutoff = 0.05 # FDR threshold to define significant correlations upon shuffling samples. 
module.pval = 0.05 # module significance p-value. Recommended is 0.05. 
hub.pval = 0.05 # connectivity significance p-value based random tetrahedral networks
cor.perm = 10; # number of permutations for calculating FDRs for all correlation pairs. 
hub.perm = 100; # number of permutations for calculating connectivity significance p-value. 

dir="/data_dir/"
rt_out_dir<-paste0(dir,"rt_logFC_",as.character(FC))
dir.create(rt_out_dir,recursive = T)
setwd(rt_out_dir)
rt_DESeq2_filter_trans<-read.csv(paste0(opposite_dir,"DESeq2_filter_trans_logFC_",as.character(FC),"_padj_",as.character(p),".csv"),header=T,row.names=1)
rt_DESeq2_filter_trans<-rt_DESeq2_filter_trans[,3:ncol(rt_DESeq2_filter_trans)]
rt_info<-read.csv(paste0(opposite_dir,"DESeq2_info.csv"),header=T,row.names=1)

datExpr<-rt_DESeq2_filter_trans
sample_annot<-rt_info[,c(4,5)]
colnames(sample_annot)<-c("SampleName","Class")
ijw <- calculate.correlation(datExpr,doPerm = cor.perm)

library(parallel)
cl <- makePSOCKcluster(n.cores)
registerDoParallel(cl)

##### calculate PFN
el <- calculate.PFN(ijw[,1:3],doPar = doPar,num.cores = n.cores)

g <- graph.data.frame(el,directed = FALSE)
save.image(file=paste0(rt_out_dir,"/rt_logFC_",as.character(FC),"_1.RData"))

MEGENA.output <- do.MEGENA(g,
                           mod.pval = module.pval,hub.pval = hub.pval,remove.unsig = TRUE,
                           min.size = 10,max.size = vcount(g)/2,
                           doPar = doPar,num.cores = n.cores,n.perm = hub.perm,
                           save.output = TRUE)

annot.table=sample_annot
id.col = 1
symbol.col= 2
save.image(file=paste0(rt_out_dir,"/rt_logFC_",as.character(FC),"_2.RData"))
summary.output <- MEGENA.ModuleSummary(MEGENA.output,
                                       mod.pvalue = module.pval,hub.pvalue = hub.pval,
                                       min.size = 10,max.size = vcount(g)/2,
                                       annot.table = annot.table,id.col = id.col,symbol.col = symbol.col,
                                       output.sig = TRUE)

pnet.obj <- plot_module(output = summary.output,PFN = g,subset.module = "comp1_3",
                        layout = "kamada.kawai",label.hubs.only = FALSE,
                        gene.set = NULL,color.code =  "grey",
                        output.plot = FALSE,out.dir = "modulePlot",col.names = c("magenta","green","cyan"),label.scaleFactor = 2,
                        hubLabel.col = "black",hubLabel.sizeProp = 1,show.topn.hubs = Inf,show.legend = TRUE)
stopCluster(cl)
save.image(file=paste0(rt_out_dir,"/rt_logFC_",as.character(FC),"_all.RData"))

####Print
setwd("/rt_out_dir")
load("rt_logFC_0_2.RData")

summary.output <- MEGENA.ModuleSummary(MEGENA.output,
                                       mod.pvalue = module.pval,hub.pvalue = hub.pval,
                                       min.size = 10,max.size = vcount(g)/2,
                                       annot.table = annot.table,id.col = id.col,symbol.col = symbol.col,
                                       output.sig = TRUE)
#Get module table
module.df = module_convert_to_table(MEGENA.output,mod.pval = 0.05,
                                    hub.pval = 0.05,min.size = 10,max.size = vcount(g)/2)

library(ggplot2)
mdf= summary.output$module.table
mdf$module.id<-unlist(lapply(as.character(mdf$module.id), FUN = function(x) {return(paste0("M",strsplit(x, split = "_",fixed = T)[[1]][2]))}))
mdf$module.parent<-unlist(lapply(as.character(mdf$module.parent), FUN = function(x) {return(paste0("M",strsplit(x, split = "_",fixed = T)[[1]][2]))}))

mdf$heat.pvalue = runif(nrow(mdf),0,0.1)

sbobj = draw_sunburst_wt_fill(module.df = mdf,feat.col = "heat.pvalue",log.transform = TRUE,
                              fill.type = "continuous",
                              fill.scale = scale_fill_gradient2(low = "white",mid = "white",high = "blue",
                                                                midpoint = -log10(0.05),na.value = "white"), 
                              id.col = "module.id",parent.col = "module.parent")
sbobj

加权共表达分析总结

推荐阅读更多精彩内容