自建库进行GO富集分析

library(clusterProfiler)
library(dplyr)
library(stringr)
options(stringsAsFactors = F)
##===============================STEP1:GO注释生成=======================
#自己构建的话,首先需要读入文件
egg <- read.delim("galili.eggnog.annotation",header = T,sep="\t")
egg[egg==""]<-NA  #将空行变成NA,方便下面的去除
#从文件中挑出基因query与eggnog注释信息
##gene_info <- egg %>% 
##  dplyr::select(GID = query, GENENAME = eggNOG_OGs) %>% na.omit()
#挑出query_name与GO注释信息
gterms <- egg %>%
  dplyr::select(query, GOs) %>% na.omit()
gene_ids <- egg$query
eggnog_lines_with_go <- egg$GOs!= ""
eggnog_lines_with_go
eggnog_annoations_go <- str_split(egg[eggnog_lines_with_go,]$GOs, ",")
gene2go <- data.frame(gene = rep(gene_ids[eggnog_lines_with_go],
                                 times = sapply(eggnog_annoations_go, length)),
                      term = unlist(eggnog_annoations_go))
names(gene2go) <- c('gene_id', 'ID')
go2name <- read.delim('GO.library', header = FALSE, stringsAsFactors = FALSE)
names(go2name) <- c('ID', 'Description', 'Ontology')

go_anno <- merge(gene2go, go2name, by = 'ID', all.x = TRUE)

## 将GO注释信息保存
save(go_anno,file = "spalax2Rabbit.NoJac.InputGene_GO.rda")

##===============================STEP3:GO富集分析=================
#目标基因列表(全部基因)
gene_select <- read.delim(file = 'gene.name.txt', stringsAsFactors = FALSE,header = F)$V1
#GO 富集分析
#默认以所有注释到 GO 的基因为背景集,也可通过 universe 参数输入背景集
#默认以 p<0.05 为标准,Benjamini 方法校正 p 值,q 值阈值 0.2
#默认输出 top500 富集结果
#如果想输出所有富集结果(不考虑 p 值阈值等),将 p、q 等值设置为 1 即可
#或者直接在 enrichResult 类对象中直接提取需要的结果
go_rich <- enricher(gene = gene_select,
                    TERM2GENE = go_anno[c('ID', 'gene_id')], 
                    TERM2NAME = go_anno[c('ID', 'Description')], 
                    pvalueCutoff = 0.5, 
                    pAdjustMethod = 'BH', 
                    qvalueCutoff = 1
)
#输出默认结果,即根据上述 p 值等阈值筛选后的

write.csv(go_rich, "dqj.sv.GO.v1.csv")

tmp <- merge(go_rich, go2name[c('ID', 'Ontology')], by = 'ID')
tmp <- tmp[c(10, 1:9)]
tmp <- tmp[order(tmp$pvalue), ]
write.table(tmp, 'dqj.sv.GO.v2.csv', sep = ',', row.names = FALSE, quote = FALSE)

write.table(tmp, '盲鼹鼠正选择的基因.GO.csv', sep = ',', row.names = FALSE, quote = FALSE)

©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

推荐阅读更多精彩内容