经典的WGCNA分析
用于疾病两组比较的MEGENA
用于两组处理比较的DGCA
整合工具CEMiTool
CEMiTool的一段代码如下:
## ----style, echo=FALSE, results="asis", message=FALSE--------------------
knitr::opts_chunk$set(tidy = FALSE,
warning = FALSE,
message = FALSE,
cache=TRUE)
## ------------------------------------------------------------------------
BiocManager::install("CEMiTool")
library("CEMiTool")
# load expression data
data(expr0)
head(expr0[,1:4])
## ---- results='hide'-----------------------------------------------------
cem <- cemitool(expr0)
## ------------------------------------------------------------------------
print(cem)
# or, more conveniently, just call `cem`
cem
## ------------------------------------------------------------------------
# inspect modules
nmodules(cem)
head(module_genes(cem))
## ---- eval=FALSE---------------------------------------------------------
# generate_report(cem, output_format=c("pdf_document", "html_document"))
## ------------------------------------------------------------------------
write_files(cem, directory="./Tables", force=TRUE)
## ------------------------------------------------------------------------
# load your sample annotation data
data(sample_annot)
head(sample_annot)
## ---- results='hide'-----------------------------------------------------
# run cemitool with sample annotation
cem <- cemitool(expr0,sample_annot)
## ------------------------------------------------------------------------
sample_annotation(cem,
sample_name_column="SampleName",
class_column="Class") <- sample_annot
## ------------------------------------------------------------------------
# generate heatmap of gene set enrichment analysis
cem <- mod_gsea(cem)
cem <- plot_gsea(cem)
show_plot(cem, "gsea")
## ------------------------------------------------------------------------
# plot gene expression within each module
cem <- plot_profile(cem)
plots <- show_plot(cem, "profile")
plots[1]
## ------------------------------------------------------------------------
# read GMT file
gmt_fname <- system.file("extdata", "pathways.gmt", package = "CEMiTool")
gmt_in <- read_gmt(gmt_fname)
## ------------------------------------------------------------------------
# perform over representation analysis
cem <- mod_ora(cem, gmt_in)
## ------------------------------------------------------------------------
# plot ora results
cem <- plot_ora(cem)
plots <- show_plot(cem, "ora")
plots[1]
## ------------------------------------------------------------------------
# read interactions
int_fname <- system.file("extdata", "interactions.tsv", package = "CEMiTool")
int_df <- read.delim(int_fname)
head(int_df)
## ------------------------------------------------------------------------
# plot interactions
library(ggplot2)
interactions_data(cem) <- int_df # add interactions
cem <- plot_interactions(cem) # generate plot
plots <- show_plot(cem, "interaction") # view the plot for the first module
plots[3]
## ---- eval=FALSE---------------------------------------------------------
# # run cemitool
# library(ggplot2)
# cem <- cemitool(expr0, sample_annot, gmt_in, interactions=int_df,
# filter=TRUE, plot=TRUE, verbose=TRUE)
# # create report as html document
# generate_report(cem, directory="./Report")
#
# # write analysis results into files
# write_files(cem, directory="./Tables")
#
# # save all plots
# save_plots(cem, "all", directory="./Plots")
## ---- eval=FALSE---------------------------------------------------------
# diagnostic_report(cem, directory="./Diagnostics")
MEGENA使用
#!/usr/bin/env Rscript
suppressMessages(library(MEGENA))
rm(list=ls())
FC=0
p=0.05
suppressMessages(library(tidyverse))
n.cores <- 19; # number of cores/threads to call for PCP
doPar <- T; # do we want to parallelize?
method = "pearson" # method for correlation. either pearson or spearman.
FDR.cutoff = 0.05 # FDR threshold to define significant correlations upon shuffling samples.
module.pval = 0.05 # module significance p-value. Recommended is 0.05.
hub.pval = 0.05 # connectivity significance p-value based random tetrahedral networks
cor.perm = 10; # number of permutations for calculating FDRs for all correlation pairs.
hub.perm = 100; # number of permutations for calculating connectivity significance p-value.
dir="/data_dir/"
rt_out_dir<-paste0(dir,"rt_logFC_",as.character(FC))
dir.create(rt_out_dir,recursive = T)
setwd(rt_out_dir)
rt_DESeq2_filter_trans<-read.csv(paste0(opposite_dir,"DESeq2_filter_trans_logFC_",as.character(FC),"_padj_",as.character(p),".csv"),header=T,row.names=1)
rt_DESeq2_filter_trans<-rt_DESeq2_filter_trans[,3:ncol(rt_DESeq2_filter_trans)]
rt_info<-read.csv(paste0(opposite_dir,"DESeq2_info.csv"),header=T,row.names=1)
datExpr<-rt_DESeq2_filter_trans
sample_annot<-rt_info[,c(4,5)]
colnames(sample_annot)<-c("SampleName","Class")
ijw <- calculate.correlation(datExpr,doPerm = cor.perm)
library(parallel)
cl <- makePSOCKcluster(n.cores)
registerDoParallel(cl)
##### calculate PFN
el <- calculate.PFN(ijw[,1:3],doPar = doPar,num.cores = n.cores)
g <- graph.data.frame(el,directed = FALSE)
save.image(file=paste0(rt_out_dir,"/rt_logFC_",as.character(FC),"_1.RData"))
MEGENA.output <- do.MEGENA(g,
mod.pval = module.pval,hub.pval = hub.pval,remove.unsig = TRUE,
min.size = 10,max.size = vcount(g)/2,
doPar = doPar,num.cores = n.cores,n.perm = hub.perm,
save.output = TRUE)
annot.table=sample_annot
id.col = 1
symbol.col= 2
save.image(file=paste0(rt_out_dir,"/rt_logFC_",as.character(FC),"_2.RData"))
summary.output <- MEGENA.ModuleSummary(MEGENA.output,
mod.pvalue = module.pval,hub.pvalue = hub.pval,
min.size = 10,max.size = vcount(g)/2,
annot.table = annot.table,id.col = id.col,symbol.col = symbol.col,
output.sig = TRUE)
pnet.obj <- plot_module(output = summary.output,PFN = g,subset.module = "comp1_3",
layout = "kamada.kawai",label.hubs.only = FALSE,
gene.set = NULL,color.code = "grey",
output.plot = FALSE,out.dir = "modulePlot",col.names = c("magenta","green","cyan"),label.scaleFactor = 2,
hubLabel.col = "black",hubLabel.sizeProp = 1,show.topn.hubs = Inf,show.legend = TRUE)
stopCluster(cl)
save.image(file=paste0(rt_out_dir,"/rt_logFC_",as.character(FC),"_all.RData"))
####Print
setwd("/rt_out_dir")
load("rt_logFC_0_2.RData")
summary.output <- MEGENA.ModuleSummary(MEGENA.output,
mod.pvalue = module.pval,hub.pvalue = hub.pval,
min.size = 10,max.size = vcount(g)/2,
annot.table = annot.table,id.col = id.col,symbol.col = symbol.col,
output.sig = TRUE)
#Get module table
module.df = module_convert_to_table(MEGENA.output,mod.pval = 0.05,
hub.pval = 0.05,min.size = 10,max.size = vcount(g)/2)
library(ggplot2)
mdf= summary.output$module.table
mdf$module.id<-unlist(lapply(as.character(mdf$module.id), FUN = function(x) {return(paste0("M",strsplit(x, split = "_",fixed = T)[[1]][2]))}))
mdf$module.parent<-unlist(lapply(as.character(mdf$module.parent), FUN = function(x) {return(paste0("M",strsplit(x, split = "_",fixed = T)[[1]][2]))}))
mdf$heat.pvalue = runif(nrow(mdf),0,0.1)
sbobj = draw_sunburst_wt_fill(module.df = mdf,feat.col = "heat.pvalue",log.transform = TRUE,
fill.type = "continuous",
fill.scale = scale_fill_gradient2(low = "white",mid = "white",high = "blue",
midpoint = -log10(0.05),na.value = "white"),
id.col = "module.id",parent.col = "module.parent")
sbobj