【Rna-seq 分析流程】03.1 富集分析-GO&KEGG

1. 简介

一般在获得差异基因后,可以通过 GO 或者 KEGG 对这些基因进行富集分析,其目的是找到这些基因之间的内在关系。有关联的基因可能存在与同一通路的上下游,或存在与两条相作用的通路里。因此富集分析后有利于从宏观的角度理解这些差异基因之际的关系,同时也方便发现差异基因是否与我们的研究目的一致。

富集分析的方法:

  1. 直接网站分析
    网址:
    GO: Gene Ontology Resource
    KEGG: Kyoto Encyclopedia of Genes and Genomes

  2. 利用 R 包分析:
    分析方法如下


2. 数据信息

  1. Candidate.csv 上一步筛选出来的候选基因的DEG表格
  2. probe.csv 第一步数据预处理时获得的探针信息,因为 clusterProfiler 分析需要使用 ENTREZ_GENE_ID

3. 思路

  1. 使用 R 包 “clusterProfiler” 进行 GO 富集分析,获得关于生物学过程(Biological process,BP),分子功能(Molecular function,MF)和细胞组分(Cellular component,CC)的信息。
  2. 将上一步的 go_result 可视化。
  3. 使用 R 包 “clusterProfiler” 进行 KEGG 富集分析。
  4. 将上一步的 kegg_result 可视化。

4. 代码

library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(ggridges)
library(circlize)

probe_symbols_ENid =read.csv(file = "../00_DataPre/probe.csv")
###---- 1. GO database analysis ----
hub = merge(Candidate, probe_symbols_ENid, by ="Symbol", all.x =T)
gene_up = hub[hub$change == 'up','ENTREZ_GENE_ID'] 
gene_down = hub[hub$change == 'down','ENTREZ_GENE_ID'] 
gene_diff = c(gene_up,gene_down)


ego <- enrichGO(gene=gene_diff,
                OrgDb=org.Hs.eg.db, 
                keyType='ENTREZID', 
                ont="ALL", 
                pAdjustMethod="BH", 
                pvalueCutoff=0.05, 
                qvalueCutoff=0.2, 
                readable=TRUE) 
head(ego)
sig_ego <- filter(ego,pvalue<0.05)
write.csv(sig_ego@result,'GO_enrich_results.csv',quote = F)
bp_top5 <- sig_ego %>%
  filter(ONTOLOGY == "BP") %>%  
  arrange(pvalue) %>%           
  head(5)             
cc_top5 <- sig_ego %>%
  filter(ONTOLOGY == "CC") %>%  
  arrange(pvalue) %>%           
  head(5)  
mf_top5 <- sig_ego %>%
  filter(ONTOLOGY == "MF") %>%   
  arrange(pvalue) %>%           
  head(5)  




#(3)可视化
go_result <- sig_ego@result
go_result <- go_result[order(go_result$pvalue),]
bp_top <- go_result %>% filter(ONTOLOGY == "BP") %>% top_n(-10, pvalue)
cc_top <- go_result %>% filter(ONTOLOGY == "CC") %>% top_n(-10, pvalue)
mf_top <- go_result %>% filter(ONTOLOGY == "MF") %>% top_n(-10, pvalue)
data <- data.frame(id = c(bp_top$ID,cc_top$ID,mf_top$ID),
                   category = c(bp_top$ONTOLOGY,cc_top$ONTOLOGY,mf_top$ONTOLOGY))
rownames(data) <- data$id

data$gene_num.min <- 0
data$gene_num.rich <- c(as.numeric(unlist(lapply(bp_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(cc_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(mf_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))))

data$gene_num.max <- rep(max(data$gene_num.rich),dim(data)[1])


# -log10 
data$"-log10Pvalue" <- c(-log10(bp_top$pvalue),-log10(cc_top$pvalue),-log10(mf_top$pvalue))


data$rich_gene_num <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(cc_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(mf_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))))

data$DEGs <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))),
               as.numeric(unlist(lapply(cc_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))),
               as.numeric(unlist(lapply(mf_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))))
data$rich.factor <- data$rich_gene_num/data$DEGs


colnames(data)[c(1,2)] <- c("id", "ONTOLOGY")
data$id <- factor(data$id, levels = data$id)

pdf('GO_circlize.pdf', width = 12, height = 10)
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]

gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]])) 

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) 
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,  
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')  
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')  
    circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE) 
    circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)  
  } )

#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]  
label_data <- data['gene_num.rich']  
p_max <- round(max(data$'-log10Pvalue')) + 1  
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))  
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)  
    ylim = get.cell.meta.data('ycenter')  
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)  
  } )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
  } )


#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] 
label_data <- data['ONTOLOGY']  
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
  plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,  
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')  
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
  } )

#Add Legend
category_legend <- Legend(
  labels = c('Biological Process', 'Cellular Component','Molecular Function'),
  type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'), 
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
#   labels = c('Up-regulated', 'Down-regulated'), 
#   type = 'points', pch = NA, background = c('#FFA500', '#FF3030'), 
#   labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), 
                       colorRampPalette(c('#FFA500', '#FF3030'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), 
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')

lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()


png('GO_circlize.png', width=800, height=600, units="px")
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]

gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]])) 

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) 
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,  
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')  
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')  
    circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE) 
    circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)  
  } )

#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]  
label_data <- data['gene_num.rich']  
p_max <- round(max(data$'-log10Pvalue')) + 1  
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))  
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)  
    ylim = get.cell.meta.data('ycenter')  
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)  
  } )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
  } )


#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] 
label_data <- data['ONTOLOGY']  
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
  plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,  
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')  
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
  } )

#Add Legend
category_legend <- Legend(
  labels = c('Biological Process', 'Cellular Component','Molecular Function'),
  type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'), 
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
#   labels = c('Up-regulated', 'Down-regulated'), 
#   type = 'points', pch = NA, background = c('#FFA500', '#FF3030'), 
#   labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), 
                       colorRampPalette(c('#FFA500', '#FF3030'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), 
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')

lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()


###---- 2.KEGG pathway analysis ----
kegg_enrich <- enrichKEGG(
  gene = gene_diff,
  keyType = "kegg",
  organism = "hsa", 
  pvalueCutoff = 0.2,
  pAdjustMethod = "BH",
  qvalueCutoff = 0.2
)

sig_kegg <- filter(kegg_enrich,pvalue<0.05)
write.csv(sig_kegg,'KEGG_enrich_results.csv',quote = F)
top5_kegg <- sig_kegg %>% 
  as.data.frame() %>%  
  arrange(pvalue) %>%  
  head(5)   
kegg_result <- pairwise_termsim(sig_kegg)

#可视化
kegg_emap <- emapplot(kegg_result, showCategory = 10) +
  theme_minimal() +
  labs(title = "KEGG Pathway Enrichment Map")


pdf('kegg_emap.pdf', width = 10, height = 10)
grid.draw(kegg_emap)
dev.off()


png('kegg_emap.png', width = 800, height = 600)
grid.draw(kegg_emap)
dev.off()
library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(ggridges)
library(circlize)

probe_symbols_ENid =read.csv(file = "../00_DataPre/probe.csv")
###---- 1. GO database analysis ----
hub = merge(Candidate, probe_symbols_ENid, by ="Symbol", all.x =T)
gene_up = hub[hub$change == 'up','ENTREZ_GENE_ID'] 
gene_down = hub[hub$change == 'down','ENTREZ_GENE_ID'] 
gene_diff = c(gene_up,gene_down)


ego <- enrichGO(gene=gene_diff,
                OrgDb=org.Hs.eg.db, 
                keyType='ENTREZID', 
                ont="ALL", 
                pAdjustMethod="BH", 
                pvalueCutoff=0.05, 
                qvalueCutoff=0.2, 
                readable=TRUE) 
head(ego)
sig_ego <- filter(ego,pvalue<0.05)
write.csv(sig_ego@result,'GO_enrich_results.csv',quote = F)
bp_top5 <- sig_ego %>%
  filter(ONTOLOGY == "BP") %>%  
  arrange(pvalue) %>%           
  head(5)             
cc_top5 <- sig_ego %>%
  filter(ONTOLOGY == "CC") %>%  
  arrange(pvalue) %>%           
  head(5)  
mf_top5 <- sig_ego %>%
  filter(ONTOLOGY == "MF") %>%   
  arrange(pvalue) %>%           
  head(5)  




#(3)可视化
go_result <- sig_ego@result
go_result <- go_result[order(go_result$pvalue),]
bp_top <- go_result %>% filter(ONTOLOGY == "BP") %>% top_n(-10, pvalue)
cc_top <- go_result %>% filter(ONTOLOGY == "CC") %>% top_n(-10, pvalue)
mf_top <- go_result %>% filter(ONTOLOGY == "MF") %>% top_n(-10, pvalue)
data <- data.frame(id = c(bp_top$ID,cc_top$ID,mf_top$ID),
                   category = c(bp_top$ONTOLOGY,cc_top$ONTOLOGY,mf_top$ONTOLOGY))
rownames(data) <- data$id

data$gene_num.min <- 0
data$gene_num.rich <- c(as.numeric(unlist(lapply(bp_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(cc_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(mf_top$BgRatio, 
                                                 function(x) strsplit(x,"/")[[1]][1]))))

data$gene_num.max <- rep(max(data$gene_num.rich),dim(data)[1])


# -log10 
data$"-log10Pvalue" <- c(-log10(bp_top$pvalue),-log10(cc_top$pvalue),-log10(mf_top$pvalue))


data$rich_gene_num <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(cc_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))),
                        as.numeric(unlist(lapply(mf_top$GeneRatio,
                                                 function(x) strsplit(x,"/")[[1]][1]))))

data$DEGs <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))),
               as.numeric(unlist(lapply(cc_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))),
               as.numeric(unlist(lapply(mf_top$GeneRatio,
                                        function(x) strsplit(x,"/")[[1]][2]))))
data$rich.factor <- data$rich_gene_num/data$DEGs


colnames(data)[c(1,2)] <- c("id", "ONTOLOGY")
data$id <- factor(data$id, levels = data$id)

pdf('GO_circlize.pdf', width = 12, height = 10)
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]

gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]])) 

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) 
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,  
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')  
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')  
    circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE) 
    circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)  
  } )

#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]  
label_data <- data['gene_num.rich']  
p_max <- round(max(data$'-log10Pvalue')) + 1  
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))  
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)  
    ylim = get.cell.meta.data('ycenter')  
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)  
  } )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
  } )


#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] 
label_data <- data['ONTOLOGY']  
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
  plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,  
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')  
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
  } )

#Add Legend
category_legend <- Legend(
  labels = c('Biological Process', 'Cellular Component','Molecular Function'),
  type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'), 
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
#   labels = c('Up-regulated', 'Down-regulated'), 
#   type = 'points', pch = NA, background = c('#FFA500', '#FF3030'), 
#   labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), 
                       colorRampPalette(c('#FFA500', '#FF3030'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), 
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')

lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()


png('GO_circlize.png', width=800, height=600, units="px")
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]

gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]])) 

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1) 
circos.track(
  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,  
  panel.fun = function(x, y) {
    ylim = get.cell.meta.data('ycenter')  
    xlim = get.cell.meta.data('xcenter')
    sector.name = get.cell.meta.data('sector.index')  
    circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE) 
    circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)  
  } )

#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]  
label_data <- data['gene_num.rich']  
p_max <- round(max(data$'-log10Pvalue')) + 1  
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))  
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)  
    ylim = get.cell.meta.data('ycenter')  
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)  
  } )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(
  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
  panel.fun = function(region, value, ...) {
    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
    ylim = get.cell.meta.data('ycenter')
    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
    sector.name = label_data[get.cell.meta.data('sector.index'),1]
    circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
  } )


#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')] 
label_data <- data['ONTOLOGY']  
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
  plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,  
  panel.fun = function(region, value, ...) {
    sector.name = get.cell.meta.data('sector.index')  
    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
  } )

#Add Legend
category_legend <- Legend(
  labels = c('Biological Process', 'Cellular Component','Molecular Function'),
  type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'), 
  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
#   labels = c('Up-regulated', 'Down-regulated'), 
#   type = 'points', pch = NA, background = c('#FFA500', '#FF3030'), 
#   labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0), 
                       colorRampPalette(c('#FFA500', '#FF3030'))(6)),
  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8), 
  title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')

lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()


###---- 2.KEGG pathway analysis ----
kegg_enrich <- enrichKEGG(
  gene = gene_diff,
  keyType = "kegg",
  organism = "hsa", 
  pvalueCutoff = 0.2,
  pAdjustMethod = "BH",
  qvalueCutoff = 0.2
)

sig_kegg <- filter(kegg_enrich,pvalue<0.05)
write.csv(sig_kegg,'KEGG_enrich_results.csv',quote = F)
top5_kegg <- sig_kegg %>% 
  as.data.frame() %>%  
  arrange(pvalue) %>%  
  head(5)   
kegg_result <- pairwise_termsim(sig_kegg)

#可视化
kegg_emap <- emapplot(kegg_result, showCategory = 10) +
  theme_minimal() +
  labs(title = "KEGG Pathway Enrichment Map")


pdf('kegg_emap.pdf', width = 10, height = 10)
grid.draw(kegg_emap)
dev.off()


png('kegg_emap.png', width = 800, height = 600)
grid.draw(kegg_emap)
dev.off()

5. 结果展示

GO-Top5分析结果:

类别
BP neutrophil chemotaxis 中性粒细胞趋化作用
BP neutrophil migration 中性粒细胞迁移
BP granulocyte chemotaxis 粒细胞趋化作用
BP granulocyte migration 粒细胞迁移
BP leukocyte chemotaxis 白细胞趋化作用
CC collagen-containing extracellular matrix 含胶原的细胞外基质
CC lysosomal lumen 溶酶体腔
CC vacuolar lumen 液泡腔
CC fibrillar collagen trimer 纤维状胶原三聚体
CC banded collagen fibril 带状胶原纤维
MF immune receptor activity 免疫受体活性
MF integrin binding 整合素结合
MF extracellular matrix structural constituent 细胞外基质结构组分
MF RAGE receptor binding RAGE受体结合
MF IgG binding IgG结合

可视化:


GO_circlize.png

KEGG top5-分析结果:

KEGG
IL-17 signaling pathway IL-17信号通路
Platelet activation 血小板活化
Tuberculosis 结核病
Cytoskeleton in muscle cells 肌细胞骨架
Asthma 哮喘

可视化:


kegg_emap.png
最后编辑于
©著作权归作者所有,转载或内容合作请联系作者
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。