Mfuzz做转录变化的时间趋势分析后对每个趋势分组挑一个代表性基因https://cloud.tencent.com/developer/article/2018571
使用clusterProfiler进行富集分析https://www.jianshu.com/p/d484003dced5
clusterProfiler进行GO富集去冗余https://www.jianshu.com/p/e5dc76debde6
针对8天和16天的斑马鱼进行时序分析,Mfuzz包实现, 需要的数据是8和16天的标准counts值以及自己选择的差异
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install("rlang")
library(ggnewscale)
library(clusterProfiler) #用来做富集分析
library(topGO)#画GO图用的
library(pathview) #看KEGG pathway的
library(enrichplot)
library(org.Hs.eg.db)#这个包里存有人的注释文件
library(org.Dr.eg.db)#这个包里存有斑马鱼的注释文件
library(ggplot2)
library(DOSE)
library(GO.db)
library(Mfuzz)
library(limma)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(ggstatsplot)
library(tidyverse)
利用GENE_ID合并,避免斑马鱼基因名字会有过多重复,all 和lian 进行合并
all<-read.csv('T8_T16_ALL_genes_时序分析.csv') #########所有16天和8天的表达值
lian<-read.csv('T8_t16_共同差异基因时序分析/T8_16差异基因.csv') ##########所需要的差异基因
TT=merge(lian,all,by='gene_id') #########上述两个表格进行合并
write.csv(TT,"T8_t16_共同差异基因时序分析/T8_16d 差异基因1500时序分析.csv")
手动打开csv,全选所有数据,选择去除重复
ll<-read.csv('T8_t16_共同差异基因时序分析/T8_16d 差异基因1500时序分析.csv')
rownames(ll)<-ll[,1]#########第一列行名
df<-ll[,-1] #########弃掉第一列
###############将数据框转化为矩阵(A)
A<-as.matrix(df) #=
去除表达量太低或者在不同时间点间变化太小的基因等步骤
###Mfuzz聚类时要求是一个ExpressionSet类型的对象,所以需要先用表达量构建这样一个对象
eset <- new("ExpressionSet",exprs = A)
# 根据标准差去除样本间差异太小的基因
eset <- filter.std(eset,min.std=0)
# 10818 genes excluded. ,不同的数据集去除的基因数量不一样
eset
eset <- standardise(eset)
c <- 7 ########c 为聚类数,可根据需要调整
m <- mestimate(eset) # 评估出最佳的m值
cl <- mfuzz(eset, c = c, m = m) # 聚类
cl$size # 查看每个cluster中的基因个数
## cluster cores
# membership values can also indicate the similarity of vectors to each other.
eset
##########################
cl.thres <- acore(eset,cl,min.acore=0.5) ## a posteriori
a=cl.thres[[1]] ################ ##获取该簇下的所有基因
a
#write.csv(a,"a.csv") #############写入表格中,筛选分值较高的分数的基因,,基因id转换可参考下面的方式
table(cl$cluster) ######### 每个簇下的基因数量
unlist(lapply(cl.thres, nrow))#########经过min,score筛选后剩下的各族基因数量,是不是对每个簇进一步简化
# 1772 3113 1822 1931 2025 2659 786 2375 1640
lapply(cl.thres, head)
#获得所有不同簇的基因
###for 循环用用
for(i in 1:7){
write.csv(cl.thres[i],file = paste("all_",i,".csv",sep = " "))
name=read.csv(file = paste("all_",i,".csv",sep = " "))
colnames(name)<-c('ENSEMBL','ense','scores')
ensembl_gene_id=name$ENSEMBL
id <-bitr(ensembl_gene_id, fromType = "ENSEMBL",
toType = c("SYMBOL"),
OrgDb = org.Dr.eg.db,drop = FALSE )
TT=merge(name,id,by='ENSEMBL')
write.csv(TT,file = paste("all_",i,".csv",sep = " "))
}
做图
library(RColorBrewer)
color.2 <- colorRampPalette(rev(c("#ff0000", "Yellow", "OliveDrab1")))(1000)
pdf('mfuzz_clusters_plot_7_2.pdf',height = 7,width = 12)
mfuzz.plot(eset,cl,mfrow=c(3,3),
new.window= FALSE,
time.labels= colnames(eset) ,
colo = color.2)
dev.off()
下一步对不同簇下基因进行富集分析
dev.new() ###重新打开做图窗口
gene_id<-cl$cluster[cl$cluster == 1] # 提取某个cluster下的基因
gene_id
gene_id_2<-cl$cluster[cl$cluster == 2]
gene_id_all<-cl$cluster####提取所有cluster下的基因
#################
write.csv(gene_id,"gene_id.csv")
write.csv(gene_id_all,"gene_id_all.csv")
T<-read.csv("gene_id.csv") ##546??
colnames(T)<- c('gene_id','row')
#################################################
##将ensemble变为ENTERZ ID, 才能进行富集分析
columns(org.Dr.eg.db)
ensembl_gene_id=T$gene_id
id <-bitr(ensembl_gene_id, fromType = "ENSEMBL",
toType = c("ENTREZID"),
OrgDb = org.Dr.eg.db,drop = FALSE )
ENTREZ_ID = id$ENTREZID ####获取entrez ID
##BP层面上的富集分析:
go_bp<-enrichGO(gene =ENTREZ_ID,OrgDb = org.Dr.eg.db, keyType='ENTREZID', ont = "BP", pAdjustMethod = "BH",pvalueCutoff = 0.05, qvalueCutoff = 0.05, readable=TRUE)
dim(go_bp)
egosimp <- simplify(go_bp,cutoff=0.7,by="p.adjust",select_fun = min,measure="Wang")
dim(egosimp)
#write.csv(go_bp@result,"go_bp.csv")
##CC层面上的富集分析:
go_cc<-enrichGO(gene = ENTREZ_ID,OrgDb = org.Dr.eg.db,keyType = 'ENTREZID', ont = "CC", pAdjustMethod = "BH",pvalueCutoff = 0.05, qvalueCutoff = 0.05)
##把结果导出保存
#write.csv(go_bp@result,"go_bp.csv")
dim(go_cc)##33 9
go_ccsimp <- simplify(go_bp,cutoff=0.7,by="p.adjust",select_fun = min,measure="Wang")
go_MF <- enrichGO(gene =ENTREZ_ID, OrgDb= org.Dr.eg.db, keyType = 'ENTREZID', ont = "MF",pAdjustMethod = "BH",pvalueCutoff = 0.05,qvalueCutoff = 0.05)
#write.csv(go_MF@result,"go_mf.csv")
go_all<-enrichGO(gene = ENTREZ_ID,OrgDb = org.Dr.eg.db,keyType = 'ENTREZID', ont = "ALL", pAdjustMethod = "BH",pvalueCutoff = 0.05, qvalueCutoff = 0.05,readable = TRUE)
go_allsimp <- simplify(go_all,cutoff=0.7,by="p.adjust",select_fun = min,measure="Wang")
dim(go_all)##
dim(go_allsimp)##
dotplot(go_bp,showCategory=10)
dotplot(egosimp,showCategory=10)
dotplot(go_allsimp,title='Top5 GO terms of each sub-class',showCategory=10,split='ONTOLOGY')+facet_grid(ONTOLOGY~.,scale="free")
cnetplot(egosimp, showCategory=5)
search_kegg_organism("zebrafish", by="common_name")
gene_kegg<-enrichKEGG(gene =ENTREZ_ID ,organism = 'dre',keyType='kegg', pAdjustMethod = "BH",pvalueCutoff = 0.5, qvalueCutoff = 0.5,use_internal_data = FALSE)
dim(gene_kegg)
dotplot(gene_kegg)
barplot(gene_kegg)
enrichMap(gene_kegg)
cnetplot(gene_kegg, showCategory=5)
#将ENTREZID转化为可读的gene symbol
eKEGG <- setReadable(gene_kegg, OrgDb = org.Dr.eg.db, keyType="ENTREZID")
cnetplot(eKEGG, showCategory=5)
3.读入gene_id_all文件差异基因,不同簇的基因汇总表,将ENSEMBL 转为EntrezID
获得做图关键的两列,ENTREZID和CLIUSTER
T<-read.csv("gene_id_all.csv") ##546??
ensembl_gene_id=T$ENSEMBL
id <-bitr(ensembl_gene_id, fromType = "ENSEMBL",
toType = c("ENTREZID"),
OrgDb = org.Dr.eg.db,drop = FALSE )
tt=merge(T,id,by='ENSEMBL')
gcSample=split(tt$ENTREZID, tt$CLUSTER)
gcSample
KEGG分析
YY <- compareCluster(gcSample,
fun = "enrichKEGG",
organism = "dre", pvalueCutoff = 0.05
)
pdf('mfuzz_clusters_plot_7_16.pdf',height = 10,width = 7)
p <- dotplot(YY,showCategory=8,label_format=100,font.size=12)
p + theme(axis.text.x = element_text(angle = 45,vjust = 0.5, hjust = 0.5,size = 13,face = "bold"))
dev.off()
GO分析
xx <- compareCluster(gcSample,
fun = "enrichGO",
OrgDb = "org.Dr.eg.db",
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05
)
dim(xx)
ego=simplify(xx,cutoff = 0.7,by="p.adjust",select_fun = min,measure = "Wang",semData = NULL)
dim(ego)
GO多组分析
pdf('mfuzz_clusters_plot_7_19.pdf',height = 10,width = 7)
p <- dotplot(ego,showCategory=8,label_format=100,font.size=12)
p + theme(axis.text.x = element_text(angle = 45,vjust = 0.5, hjust = 0.5,size = 13,face = "bold"))
dev.off()
改变x轴标签与x轴的角度与距离angle = 45,vjust = 0.5, hjust = 0.5
获取数据,取出每个组最富集的10个条目,存储起来# 不麻烦的吧
x <- ego@compareClusterResult
y = x %>% group_by(Cluster) %>% top_n(-10, pvalue)
y = x[x$Description %in% y$Description,]
############dotplot 不好做图,需要把数据搞出来用ggplot2做图
test=as.data.frame(y)
ggplot(test,aes(x=Cluster,y=Description))+geom_point(aes(color = p.adjust,size = Count))+
scale_color_gradient(low = "red", high = "blue")+xlab("Fold Enrichment")+
theme_bw()
test$Description = factor(test$Description,levels = test$Description,ordered = T)
p + theme(axis.text.x = element_text(
angle = 45,
vjust = 0.5, hjust = 0.5
))+coord_flip() #coord_flip() XY转换
p + theme(axis.text.x = element_text(angle = 45,vjust = 0.5, hjust = 0.5
))+ scale_colour_gradient(low="red",high="green")+
theme(legend.title = element_text(size = 15, face = 2))