这里是佳奥!表观调控分析的最后一篇,分析剩余RNA-Seq的数据,让我们开始吧!
1 分离差异表达基因
第十二步,RNA-gene-bed。
load(file = 'deg_output.Rdata')
library(ggpubr)
colnames(DEG_PhoKO)
DEG_PhoKO$log=log(DEG_PhoKO$baseMean+1)
DEG_PhoKO$change=ifelse(DEG_PhoKO$padj>0.05,'stable',
ifelse(DEG_PhoKO$log2FoldChange > 0,'up','down'))
table(DEG_PhoKO$change)
ggscatter(DEG_PhoKO,x="log" ,y="log2FoldChange",color = 'change')
DEG_SppsKO$log=log(DEG_SppsKO$baseMean+1)
DEG_SppsKO$change=ifelse(DEG_SppsKO$padj>0.05,'stable',
ifelse(DEG_SppsKO$log2FoldChange > 0,'up','down'))
table(DEG_SppsKO$change)
ggscatter(DEG_SppsKO,x="log" ,y="log2FoldChange",color = 'change')
library(UpSetR)
SppsKO_up=rownames(DEG_SppsKO[DEG_SppsKO$change=='up',])
SppsKO_down=rownames(DEG_SppsKO[DEG_SppsKO$change=='down',])
PhoKO_up=rownames(DEG_PhoKO[DEG_PhoKO$change=='up',])
PhoKO_down=rownames(DEG_PhoKO[DEG_PhoKO$change=='down',])
allG=unique(c(SppsKO_up,SppsKO_down,PhoKO_up,PhoKO_down))
df=data.frame(allG=allG,
SppsKO_up=as.numeric(allG %in% SppsKO_up),
SppsKO_down=as.numeric(allG %in% SppsKO_down),
PhoKO_up=as.numeric(allG %in% PhoKO_up),
PhoKO_down=as.numeric(allG %in% PhoKO_down))
upset(df)
重要的是df:一个01矩阵
我们要把之前的数据取出来:572、474、447、444等。
colnames(df)
# "SppsKO_up" "SppsKO_down" "PhoKO_up" "PhoKO_down"
##筛选出这些数据,对比一下上面的colnames就可以理解如何取值了
SppsKO_up_uniq=df[df[,2]==1 & df[,3]==0 & df[,4]==0 & df[,5]==0 ,1]
SppsKO_down_uniq=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==0 ,1]
PhoKO_up_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
PhoKO_down_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==0 & df[,5]==1 ,1]
SppsKO_up_PhoKO_up=df[df[,2]==1 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
SppsKO_down_PhoKO_down=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==1 ,1]
但我们需要的是基因的坐标文件。
##加载包,内含坐标
library(org.Dm.eg.db)
t1=toTable(org.Dm.egCHRLOC)
t2=toTable(org.Dm.egCHRLOCEND)
t3=toTable(org.Dm.egSYMBOL)
##下一步是把t1、t2、t3、merge合并,再与基因相对应,留作作业
##另一个包也可以获取坐标信息
require(TxDb.Dmelanogaster.UCSC.dm6.ensGene )
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
gs=as.data.frame(genes(txdb))##获取信息表
library(org.Dm.eg.db)
t100=toTable(org.Dm.egFLYBASE)
t101=toTable(org.Dm.egSYMBOL)
t200=merge(t100,t101,by='gene_id')
colnames(gs)[6]="flybase_id"
t300=merge(t200,gs,by="flybase_id")
SppsKO_up_uniq
SppsKO_bed=t300[match(SppsKO_up_uniq[SppsKO_up_uniq %in% t300$symbol ],
t300$symbol),4:6]
最终获取.bed
2 多个基因富集分析
第十三步,annotation。
对一个基因集注释
load(file = 'deg_output.Rdata')##导入DEG_Phoko和DEG_SppsKO的Data
library(ggpubr)
colnames(DEG_PhoKO)
DEG_PhoKO$log=log(DEG_PhoKO$baseMean+1)
DEG_PhoKO$change=ifelse(DEG_PhoKO$padj>0.05,'stable',
ifelse(DEG_PhoKO$log2FoldChange > 0,'up','down'))
table(DEG_PhoKO$change)
ggscatter(DEG_PhoKO,x="log" ,y="log2FoldChange",color = 'change')
DEG_SppsKO$log=log(DEG_SppsKO$baseMean+1)
DEG_SppsKO$change=ifelse(DEG_SppsKO$padj>0.05,'stable',
ifelse(DEG_SppsKO$log2FoldChange > 0,'up','down'))
table(DEG_SppsKO$change)
ggscatter(DEG_SppsKO,x="log" ,y="log2FoldChange",color = 'change')
library(UpSetR)
SppsKO_up=rownames(DEG_SppsKO[DEG_SppsKO$change=='up',])
SppsKO_down=rownames(DEG_SppsKO[DEG_SppsKO$change=='down',])
PhoKO_up=rownames(DEG_PhoKO[DEG_PhoKO$change=='up',])
PhoKO_down=rownames(DEG_PhoKO[DEG_PhoKO$change=='down',])
allG=unique(c(SppsKO_up,SppsKO_down,PhoKO_up,PhoKO_down))
df=data.frame(allG=allG,
SppsKO_up=as.numeric(allG %in% SppsKO_up),
SppsKO_down=as.numeric(allG %in% SppsKO_down),
PhoKO_up=as.numeric(allG %in% PhoKO_up),
PhoKO_down=as.numeric(allG %in% PhoKO_down))
upset(df)
colnames(df)
# "SppsKO_up" "SppsKO_down" "PhoKO_up" "PhoKO_down"
SppsKO_up_uniq=df[df[,2]==1 & df[,3]==0 & df[,4]==0 & df[,5]==0 ,1]
SppsKO_down_uniq=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==0 ,1]
PhoKO_up_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
PhoKO_down_uniq=df[df[,2]==0 & df[,3]==0 & df[,4]==0 & df[,5]==1 ,1]
SppsKO_up_PhoKO_up=df[df[,2]==1 & df[,3]==0 & df[,4]==1 & df[,5]==0 ,1]
SppsKO_down_PhoKO_down=df[df[,2]==0 & df[,3]==1 & df[,4]==0 & df[,5]==1 ,1]
##anno分析,对第一个基因集进行注释
rm(list = ls())
load(file = '6-genesets.Rdata')
library(ggplot2)
library(clusterProfiler)
library(org.Dm.eg.db)
##单个基因集的KEGG注释,批量注释即生产function函数kegg_anno
kegg_anno <- function(gs,pro){
#pro='SppsKO_up_uniq'
#gs=SppsKO_up_uniq
gene_up= bitr(gs, fromType = "SYMBOL",
toType = c( "ENTREZID"),
OrgDb = org.Dm.eg.db)[,2]
kk.up <- enrichKEGG(gene = gene_up,
organism = 'dme',
keyType = 'ncbi-geneid',
pvalueCutoff = 0.9,
qvalueCutoff =0.9)
head(kk.up)[,1:6]
kk=kk.up
dotplot(kk)
ggsave(paste0(pro,'_kk.png'))
kk=DOSE::setReadable(kk, OrgDb='org.Dm.eg.db',keyType='ENTREZID')
write.csv(kk@result,paste0(pro,'_kk.up.csv'))
}
l=list(SppsKO_up_uniq,SppsKO_down_uniq,
PhoKO_up_uniq, PhoKO_down_uniq,
SppsKO_up_PhoKO_up,SppsKO_down_PhoKO_down)
names(l)=c('SppsKO_up_uniq','SppsKO_down_uniq',
'PhoKO_up_uniq', 'PhoKO_down_uniq',
'SppsKO_up_PhoKO_up','SppsKO_down_PhoKO_down')
lapply(1:length(l), function(i){
kegg_anno(l[[i]],names(l)[i])
})
单个基因集_kk.png:
运行上述全部代码,对目录下6个文件进行绘图分析。
Warning messages:
1: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
2.25% of input gene IDs are fail to map...
2: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
2.97% of input gene IDs are fail to map...
3: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
2.91% of input gene IDs are fail to map...
4: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
4.22% of input gene IDs are fail to map...
5: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
1.5% of input gene IDs are fail to map...
6: In bitr(gs, fromType = "SYMBOL", toType = c("ENTREZID"), OrgDb = org.Dm.eg.db) :
2.37% of input gene IDs are fail to map...
生成结果如下:
如果遇到图片内容挤在一起,点击这个扫把
Clear all Plots
把plot界面还原一下,再运行一次就正常了。##用现成的函数compareCluster可以做到
library(ggplot2)
library(clusterProfiler)
library(org.Dm.eg.db)
gcSample=lapply(l, function(x){
return( bitr(x, fromType = "SYMBOL",
toType = c( "ENTREZID"),
OrgDb = org.Dm.eg.db)[,2] )
})
xx <- compareCluster(gcSample, fun="enrichKEGG",
keyType = 'ncbi-geneid',
organism="dme", pvalueCutoff=0.05)
dotplot(xx)
最后生成的结果对比:
别的分析依照GitHub代码来,如GO分析。
至此表观分析学习结束,主要就是下游的各种分析。
我们新的篇章再见!