rm(list = ls())
library(clusterProfiler)
library(org.At.tair.db)
library(stringr)
library(ggplot2)
load("genes.Rdata")
head(genes)
## [1] "ATBCA3" "BCA3" "AtWSCP" "Kunitz-PI;1" "BCAT6"
## [6] "OSR1"
1.ID转换
首先是要转换ID,拟南芥是要TAIR id来做富集分析。怎么知道的呢? 如果拿ENTRIZID来做,会收到这样式的报错信息:那我们就知道应该用AT开头的ID了。那是啥ID呢,查查呗。
x = bitr(genes,fromType = "SYMBOL",toType = "TAIR",
OrgDb = "org.At.tair.db")
head(x)
## SYMBOL TAIR
## 1 ATBCA3 AT1G23730
## 2 BCA3 AT1G23730
## 3 AtWSCP AT1G72290
## 4 Kunitz-PI;1 AT1G72290
## 5 BCAT6 AT1G50110
## 6 OSR1 AT2G41230
2.做KEGG富集
ekk <- enrichKEGG(gene = x$TAIR,organism = 'ath')
ekk <- setReadable(ekk,OrgDb = org.At.tair.db,keyType = "TAIR")
#如果ekk是空的,这句就会报错,因为没富集到任何通路。
# 条带图画一下
barplot(ekk)
发现纵坐标有多余的东西,可以在enrichResult对象里面删掉。
代码里的两个斜杠代表括号就是括号本身,不加两个斜杠就会被当作正则表达式的括号,它另有含义。
ekk@result$Description = str_remove(ekk@result$Description," - Arabidopsis thaliana \\(thale cress\\)")
barplot(ekk)
改完再画,这回好咯
3.GO富集分析
就比较简单,指定一下keyType参数即可
ego <- enrichGO(gene = genes,OrgDb= org.At.tair.db,
keyType = "SYMBOL",
ont = "ALL")
barplot(ego, split = "ONTOLOGY") +
facet_grid(ONTOLOGY ~ ., space = "free_y",scales = "free_y")