【Rna-seq 分析流程】03.1 富集分析-GO&KEGG

1. 简介

一般在获得差异基因后，可以通过 GO 或者 KEGG 对这些基因进行富集分析，其目的是找到这些基因之间的内在关系。有关联的基因可能存在与同一通路的上下游，或存在与两条相作用的通路里。因此富集分析后有利于从宏观的角度理解这些差异基因之际的关系，同时也方便发现差异基因是否与我们的研究目的一致。
富集分析的方法：
直接网站分析
网址：
GO: Gene Ontology Resource
KEGG: Kyoto Encyclopedia of Genes and Genomes
利用 R 包分析：
分析方法如下
2. 数据信息

Candidate.csv 上一步筛选出来的候选基因的DEG表格
probe.csv 第一步数据预处理时获得的探针信息，因为 clusterProfiler 分析需要使用 ENTREZ_GENE_ID
3. 思路

使用 R 包 “clusterProfiler” 进行 GO 富集分析，获得关于生物学过程（Biological process，BP），分子功能（Molecular function，MF）和细胞组分（Cellular component，CC）的信息。
将上一步的 go_result 可视化。
使用 R 包 “clusterProfiler” 进行 KEGG 富集分析。
将上一步的 kegg_result 可视化。
4. 代码

library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(ggridges)
library(circlize)

probe_symbols_ENid =read.csv(file = "../00_DataPre/probe.csv")
###---- 1. GO database analysis ----
hub = merge(Candidate, probe_symbols_ENid, by ="Symbol", all.x =T)
gene_up = hub[hub$change == 'up','ENTREZ_GENE_ID'] 
gene_down = hub[hub$change == 'down','ENTREZ_GENE_ID'] 
gene_diff = c(gene_up,gene_down)


ego <- enrichGO(gene=gene_diff,
                OrgDb=org.Hs.eg.db, 
                keyType='ENTREZID', 
                ont="ALL", 
                pAdjustMethod="BH", 
                pvalueCutoff=0.05, 
                qvalueCutoff=0.2, 
                readable=TRUE) 
head(ego)
sig_ego <- filter(ego,pvalue<0.05)
write.csv(sig_ego@result,'GO_enrich_results.csv',quote = F)
bp_top5 <- sig_ego %>%
  filter(ONTOLOGY == "BP") %>%  
  arrange(pvalue) %>%           
  head(5)             
cc_top5 <- sig_ego %>%
  filter(ONTOLOGY == "CC") %>%  
  arrange(pvalue) %>%           
  head(5)  
mf_top5 <- sig_ego %>%
  filter(ONTOLOGY == "MF") %>%   
  arrange(pvalue) %>%           
  head(5)  




#(3)可视化
go_result <- sig_ego@result
go_result <- go_result[order(go_result$pvalue),]
bp_top <- go_result %>% filter(ONTOLOGY == "BP") %>% top_n(-10, pvalue)
cc_top <- go_result %>% filter(ONTOLOGY == "CC") %>% top_n(-10, pvalue)
mf_top <- go_result %>% filter(ONTOLOGY == "MF") %>% top_n(-10, pvalue)
data <- data.frame(id = c(bp_top$ID,cc_top$ID,mf_top$ID),
                   category = c(bp_top$ONTOLOGY,cc_top$ONTOLOGY,mf_top$ONTOLOGY))
rownames(data) <- data$id

data$gene_num.min <- 0
data$gene_num.rich <- c(as.numeric(unlist(lapply(bp_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(cc_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(mf_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))))

data$gene_num.max <- rep(max(data$gene_num.rich),dim(data)[1])


# -log10 
data$"-log10Pvalue" <- c(-log10(bp_top$pvalue),-log10(cc_top$pvalue),-log10(mf_top$pvalue))


data$rich_gene_num <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(cc_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(mf_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))))

data$DEGs <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))),
               as.numeric(unlist(lapply(cc_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))),
               as.numeric(unlist(lapply(mf_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))))
data$rich.factor <- data$rich_gene_num/data$DEGs


colnames(data)[c(1,2)] <- c("id", "ONTOLOGY")
data$id <- factor(data$id, levels = data$id)

pdf('GO_circlize.pdf', width = 12, height = 10)
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]

gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]])) 

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) 
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,  
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')  
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')  
    circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE) 
    circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)  
  } )

#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]  
label_data <- data['gene_num.rich']  
p_max <- round(max(data$'-log10Pvalue')) + 1  
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))  
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)  
    ylim = get.cell.meta.data('ycenter')  
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)  
  } )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
  } )


#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] 
label_data <- data['ONTOLOGY']  
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
  plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,  
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')  
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
  } )

#Add Legend
category_legend <- Legend(
  labels = c('Biological Process', 'Cellular Component','Molecular Function'),
  type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'), 
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
#   labels = c('Up-regulated', 'Down-regulated'), 
#   type = 'points', pch = NA, background = c('#FFA500', '#FF3030'), 
#   labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), 
                       colorRampPalette(c('#FFA500', '#FF3030'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), 
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')

lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()


png('GO_circlize.png', width=800, height=600, units="px")
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]

gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]])) 

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) 
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,  
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')  
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')  
    circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE) 
    circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)  
  } )

#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]  
label_data <- data['gene_num.rich']  
p_max <- round(max(data$'-log10Pvalue')) + 1  
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))  
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)  
    ylim = get.cell.meta.data('ycenter')  
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)  
  } )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
  } )


#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] 
label_data <- data['ONTOLOGY']  
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
  plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,  
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')  
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
  } )

#Add Legend
category_legend <- Legend(
  labels = c('Biological Process', 'Cellular Component','Molecular Function'),
  type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'), 
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
#   labels = c('Up-regulated', 'Down-regulated'), 
#   type = 'points', pch = NA, background = c('#FFA500', '#FF3030'), 
#   labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), 
                       colorRampPalette(c('#FFA500', '#FF3030'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), 
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')

lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()


###---- 2.KEGG pathway analysis ----
kegg_enrich <- enrichKEGG(
  gene = gene_diff,
  keyType = "kegg",
  organism = "hsa", 
  pvalueCutoff = 0.2,
  pAdjustMethod = "BH",
  qvalueCutoff = 0.2
)

sig_kegg <- filter(kegg_enrich,pvalue<0.05)
write.csv(sig_kegg,'KEGG_enrich_results.csv',quote = F)
top5_kegg <- sig_kegg %>% 
  as.data.frame() %>%  
  arrange(pvalue) %>%  
  head(5)   
kegg_result <- pairwise_termsim(sig_kegg)

#可视化
kegg_emap <- emapplot(kegg_result, showCategory = 10) +
  theme_minimal() +
  labs(title = "KEGG Pathway Enrichment Map")


pdf('kegg_emap.pdf', width = 10, height = 10)
grid.draw(kegg_emap)
dev.off()


png('kegg_emap.png', width = 800, height = 600)
grid.draw(kegg_emap)
dev.off()
library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(ggridges)
library(circlize)

probe_symbols_ENid =read.csv(file = "../00_DataPre/probe.csv")
###---- 1. GO database analysis ----
hub = merge(Candidate, probe_symbols_ENid, by ="Symbol", all.x =T)
gene_up = hub[hub$change == 'up','ENTREZ_GENE_ID'] 
gene_down = hub[hub$change == 'down','ENTREZ_GENE_ID'] 
gene_diff = c(gene_up,gene_down)


ego <- enrichGO(gene=gene_diff,
                OrgDb=org.Hs.eg.db, 
                keyType='ENTREZID', 
                ont="ALL", 
                pAdjustMethod="BH", 
                pvalueCutoff=0.05, 
                qvalueCutoff=0.2, 
                readable=TRUE) 
head(ego)
sig_ego <- filter(ego,pvalue<0.05)
write.csv(sig_ego@result,'GO_enrich_results.csv',quote = F)
bp_top5 <- sig_ego %>%
  filter(ONTOLOGY == "BP") %>%  
  arrange(pvalue) %>%           
  head(5)             
cc_top5 <- sig_ego %>%
  filter(ONTOLOGY == "CC") %>%  
  arrange(pvalue) %>%           
  head(5)  
mf_top5 <- sig_ego %>%
  filter(ONTOLOGY == "MF") %>%   
  arrange(pvalue) %>%           
  head(5)  




#(3)可视化
go_result <- sig_ego@result
go_result <- go_result[order(go_result$pvalue),]
bp_top <- go_result %>% filter(ONTOLOGY == "BP") %>% top_n(-10, pvalue)
cc_top <- go_result %>% filter(ONTOLOGY == "CC") %>% top_n(-10, pvalue)
mf_top <- go_result %>% filter(ONTOLOGY == "MF") %>% top_n(-10, pvalue)
data <- data.frame(id = c(bp_top$ID,cc_top$ID,mf_top$ID),
                   category = c(bp_top$ONTOLOGY,cc_top$ONTOLOGY,mf_top$ONTOLOGY))
rownames(data) <- data$id

data$gene_num.min <- 0
data$gene_num.rich <- c(as.numeric(unlist(lapply(bp_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(cc_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(mf_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))))

data$gene_num.max <- rep(max(data$gene_num.rich),dim(data)[1])


# -log10 
data$"-log10Pvalue" <- c(-log10(bp_top$pvalue),-log10(cc_top$pvalue),-log10(mf_top$pvalue))


data$rich_gene_num <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(cc_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(mf_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))))

data$DEGs <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))),
               as.numeric(unlist(lapply(cc_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))),
               as.numeric(unlist(lapply(mf_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))))
data$rich.factor <- data$rich_gene_num/data$DEGs


colnames(data)[c(1,2)] <- c("id", "ONTOLOGY")
data$id <- factor(data$id, levels = data$id)

pdf('GO_circlize.pdf', width = 12, height = 10)
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]

gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]])) 

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) 
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,  
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')  
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')  
    circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE) 
    circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)  
  } )

#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]  
label_data <- data['gene_num.rich']  
p_max <- round(max(data$'-log10Pvalue')) + 1  
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))  
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)  
    ylim = get.cell.meta.data('ycenter')  
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)  
  } )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
  } )


#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] 
label_data <- data['ONTOLOGY']  
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
  plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,  
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')  
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
  } )

#Add Legend
category_legend <- Legend(
  labels = c('Biological Process', 'Cellular Component','Molecular Function'),
  type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'), 
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
#   labels = c('Up-regulated', 'Down-regulated'), 
#   type = 'points', pch = NA, background = c('#FFA500', '#FF3030'), 
#   labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), 
                       colorRampPalette(c('#FFA500', '#FF3030'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), 
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')

lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()


png('GO_circlize.png', width=800, height=600, units="px")
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]

gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]])) 

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) 
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,  
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')  
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')  
    circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE) 
    circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)  
  } )

#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]  
label_data <- data['gene_num.rich']  
p_max <- round(max(data$'-log10Pvalue')) + 1  
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))  
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)  
    ylim = get.cell.meta.data('ycenter')  
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)  
  } )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
  } )


#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] 
label_data <- data['ONTOLOGY']  
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
  plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,  
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')  
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
  } )

#Add Legend
category_legend <- Legend(
  labels = c('Biological Process', 'Cellular Component','Molecular Function'),
  type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'), 
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
#   labels = c('Up-regulated', 'Down-regulated'), 
#   type = 'points', pch = NA, background = c('#FFA500', '#FF3030'), 
#   labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), 
                       colorRampPalette(c('#FFA500', '#FF3030'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), 
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')

lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()


###---- 2.KEGG pathway analysis ----
kegg_enrich <- enrichKEGG(
  gene = gene_diff,
  keyType = "kegg",
  organism = "hsa", 
  pvalueCutoff = 0.2,
  pAdjustMethod = "BH",
  qvalueCutoff = 0.2
)

sig_kegg <- filter(kegg_enrich,pvalue<0.05)
write.csv(sig_kegg,'KEGG_enrich_results.csv',quote = F)
top5_kegg <- sig_kegg %>% 
  as.data.frame() %>%  
  arrange(pvalue) %>%  
  head(5)   
kegg_result <- pairwise_termsim(sig_kegg)

#可视化
kegg_emap <- emapplot(kegg_result, showCategory = 10) +
  theme_minimal() +
  labs(title = "KEGG Pathway Enrichment Map")


pdf('kegg_emap.pdf', width = 10, height = 10)
grid.draw(kegg_emap)
dev.off()


png('kegg_emap.png', width = 800, height = 600)
grid.draw(kegg_emap)
dev.off()
5. 结果展示

GO-Top5分析结果：
类别
BP	neutrophil chemotaxis	中性粒细胞趋化作用
BP	neutrophil migration	中性粒细胞迁移
BP	granulocyte chemotaxis	粒细胞趋化作用
BP	granulocyte migration	粒细胞迁移
BP	leukocyte chemotaxis	白细胞趋化作用
CC	collagen-containing extracellular matrix	含胶原的细胞外基质
CC	lysosomal lumen	溶酶体腔
CC	vacuolar lumen	液泡腔
CC	fibrillar collagen trimer	纤维状胶原三聚体
CC	banded collagen fibril	带状胶原纤维
MF	immune receptor activity	免疫受体活性
MF	integrin binding	整合素结合
MF	extracellular matrix structural constituent	细胞外基质结构组分
MF	RAGE receptor binding	RAGE受体结合
MF	IgG binding	IgG结合
可视化：
GO_circlize.png
KEGG top5-分析结果：
KEGG
IL-17 signaling pathway	IL-17信号通路
Platelet activation	血小板活化
Tuberculosis	结核病
Cytoskeleton in muscle cells	肌细胞骨架
Asthma	哮喘
可视化：
kegg_emap.png