1. 简介
一般在获得差异基因后,可以通过 GO 或者 KEGG 对这些基因进行富集分析,其目的是找到这些基因之间的内在关系。有关联的基因可能存在与同一通路的上下游,或存在与两条相作用的通路里。因此富集分析后有利于从宏观的角度理解这些差异基因之际的关系,同时也方便发现差异基因是否与我们的研究目的一致。
富集分析的方法:
直接网站分析
网址:
GO: Gene Ontology Resource
KEGG: Kyoto Encyclopedia of Genes and Genomes利用 R 包分析:
分析方法如下
2. 数据信息
- Candidate.csv 上一步筛选出来的候选基因的DEG表格
- probe.csv 第一步数据预处理时获得的探针信息,因为 clusterProfiler 分析需要使用 ENTREZ_GENE_ID
3. 思路
- 使用 R 包 “clusterProfiler” 进行 GO 富集分析,获得关于生物学过程(Biological process,BP),分子功能(Molecular function,MF)和细胞组分(Cellular component,CC)的信息。
- 将上一步的 go_result 可视化。
- 使用 R 包 “clusterProfiler” 进行 KEGG 富集分析。
- 将上一步的 kegg_result 可视化。
4. 代码
library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(ggridges)
library(circlize)
probe_symbols_ENid =read.csv(file = "../00_DataPre/probe.csv")
###---- 1. GO database analysis ----
hub = merge(Candidate, probe_symbols_ENid, by ="Symbol", all.x =T)
gene_up = hub[hub$change == 'up','ENTREZ_GENE_ID']
gene_down = hub[hub$change == 'down','ENTREZ_GENE_ID']
gene_diff = c(gene_up,gene_down)
ego <- enrichGO(gene=gene_diff,
OrgDb=org.Hs.eg.db,
keyType='ENTREZID',
ont="ALL",
pAdjustMethod="BH",
pvalueCutoff=0.05,
qvalueCutoff=0.2,
readable=TRUE)
head(ego)
sig_ego <- filter(ego,pvalue<0.05)
write.csv(sig_ego@result,'GO_enrich_results.csv',quote = F)
bp_top5 <- sig_ego %>%
filter(ONTOLOGY == "BP") %>%
arrange(pvalue) %>%
head(5)
cc_top5 <- sig_ego %>%
filter(ONTOLOGY == "CC") %>%
arrange(pvalue) %>%
head(5)
mf_top5 <- sig_ego %>%
filter(ONTOLOGY == "MF") %>%
arrange(pvalue) %>%
head(5)
#(3)可视化
go_result <- sig_ego@result
go_result <- go_result[order(go_result$pvalue),]
bp_top <- go_result %>% filter(ONTOLOGY == "BP") %>% top_n(-10, pvalue)
cc_top <- go_result %>% filter(ONTOLOGY == "CC") %>% top_n(-10, pvalue)
mf_top <- go_result %>% filter(ONTOLOGY == "MF") %>% top_n(-10, pvalue)
data <- data.frame(id = c(bp_top$ID,cc_top$ID,mf_top$ID),
category = c(bp_top$ONTOLOGY,cc_top$ONTOLOGY,mf_top$ONTOLOGY))
rownames(data) <- data$id
data$gene_num.min <- 0
data$gene_num.rich <- c(as.numeric(unlist(lapply(bp_top$BgRatio,
function(x) strsplit(x,"/")[[1]][1]))),
as.numeric(unlist(lapply(cc_top$BgRatio,
function(x) strsplit(x,"/")[[1]][1]))),
as.numeric(unlist(lapply(mf_top$BgRatio,
function(x) strsplit(x,"/")[[1]][1]))))
data$gene_num.max <- rep(max(data$gene_num.rich),dim(data)[1])
# -log10
data$"-log10Pvalue" <- c(-log10(bp_top$pvalue),-log10(cc_top$pvalue),-log10(mf_top$pvalue))
data$rich_gene_num <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][1]))),
as.numeric(unlist(lapply(cc_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][1]))),
as.numeric(unlist(lapply(mf_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][1]))))
data$DEGs <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][2]))),
as.numeric(unlist(lapply(cc_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][2]))),
as.numeric(unlist(lapply(mf_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][2]))))
data$rich.factor <- data$rich_gene_num/data$DEGs
colnames(data)[c(1,2)] <- c("id", "ONTOLOGY")
data$id <- factor(data$id, levels = data$id)
pdf('GO_circlize.pdf', width = 12, height = 10)
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]
gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]]))
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,
panel.fun = function(x, y) {
ylim = get.cell.meta.data('ycenter')
xlim = get.cell.meta.data('xcenter')
sector.name = get.cell.meta.data('sector.index')
circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE)
circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)
} )
#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]
label_data <- data['gene_num.rich']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)
} )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
} )
#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')]
label_data <- data['ONTOLOGY']
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,
panel.fun = function(region, value, ...) {
sector.name = get.cell.meta.data('sector.index')
circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
} )
#Add Legend
category_legend <- Legend(
labels = c('Biological Process', 'Cellular Component','Molecular Function'),
type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'),
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
# labels = c('Up-regulated', 'Down-regulated'),
# type = 'points', pch = NA, background = c('#FFA500', '#FF3030'),
# labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
colorRampPalette(c('#FFA500', '#FF3030'))(6)),
legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')
lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()
png('GO_circlize.png', width=800, height=600, units="px")
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]
gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]]))
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,
panel.fun = function(x, y) {
ylim = get.cell.meta.data('ycenter')
xlim = get.cell.meta.data('xcenter')
sector.name = get.cell.meta.data('sector.index')
circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE)
circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)
} )
#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]
label_data <- data['gene_num.rich']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)
} )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
} )
#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')]
label_data <- data['ONTOLOGY']
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,
panel.fun = function(region, value, ...) {
sector.name = get.cell.meta.data('sector.index')
circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
} )
#Add Legend
category_legend <- Legend(
labels = c('Biological Process', 'Cellular Component','Molecular Function'),
type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'),
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
# labels = c('Up-regulated', 'Down-regulated'),
# type = 'points', pch = NA, background = c('#FFA500', '#FF3030'),
# labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
colorRampPalette(c('#FFA500', '#FF3030'))(6)),
legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')
lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()
###---- 2.KEGG pathway analysis ----
kegg_enrich <- enrichKEGG(
gene = gene_diff,
keyType = "kegg",
organism = "hsa",
pvalueCutoff = 0.2,
pAdjustMethod = "BH",
qvalueCutoff = 0.2
)
sig_kegg <- filter(kegg_enrich,pvalue<0.05)
write.csv(sig_kegg,'KEGG_enrich_results.csv',quote = F)
top5_kegg <- sig_kegg %>%
as.data.frame() %>%
arrange(pvalue) %>%
head(5)
kegg_result <- pairwise_termsim(sig_kegg)
#可视化
kegg_emap <- emapplot(kegg_result, showCategory = 10) +
theme_minimal() +
labs(title = "KEGG Pathway Enrichment Map")
pdf('kegg_emap.pdf', width = 10, height = 10)
grid.draw(kegg_emap)
dev.off()
png('kegg_emap.png', width = 800, height = 600)
grid.draw(kegg_emap)
dev.off()
library(clusterProfiler)
library(dplyr)
library(ggplot2)
library(ggridges)
library(circlize)
probe_symbols_ENid =read.csv(file = "../00_DataPre/probe.csv")
###---- 1. GO database analysis ----
hub = merge(Candidate, probe_symbols_ENid, by ="Symbol", all.x =T)
gene_up = hub[hub$change == 'up','ENTREZ_GENE_ID']
gene_down = hub[hub$change == 'down','ENTREZ_GENE_ID']
gene_diff = c(gene_up,gene_down)
ego <- enrichGO(gene=gene_diff,
OrgDb=org.Hs.eg.db,
keyType='ENTREZID',
ont="ALL",
pAdjustMethod="BH",
pvalueCutoff=0.05,
qvalueCutoff=0.2,
readable=TRUE)
head(ego)
sig_ego <- filter(ego,pvalue<0.05)
write.csv(sig_ego@result,'GO_enrich_results.csv',quote = F)
bp_top5 <- sig_ego %>%
filter(ONTOLOGY == "BP") %>%
arrange(pvalue) %>%
head(5)
cc_top5 <- sig_ego %>%
filter(ONTOLOGY == "CC") %>%
arrange(pvalue) %>%
head(5)
mf_top5 <- sig_ego %>%
filter(ONTOLOGY == "MF") %>%
arrange(pvalue) %>%
head(5)
#(3)可视化
go_result <- sig_ego@result
go_result <- go_result[order(go_result$pvalue),]
bp_top <- go_result %>% filter(ONTOLOGY == "BP") %>% top_n(-10, pvalue)
cc_top <- go_result %>% filter(ONTOLOGY == "CC") %>% top_n(-10, pvalue)
mf_top <- go_result %>% filter(ONTOLOGY == "MF") %>% top_n(-10, pvalue)
data <- data.frame(id = c(bp_top$ID,cc_top$ID,mf_top$ID),
category = c(bp_top$ONTOLOGY,cc_top$ONTOLOGY,mf_top$ONTOLOGY))
rownames(data) <- data$id
data$gene_num.min <- 0
data$gene_num.rich <- c(as.numeric(unlist(lapply(bp_top$BgRatio,
function(x) strsplit(x,"/")[[1]][1]))),
as.numeric(unlist(lapply(cc_top$BgRatio,
function(x) strsplit(x,"/")[[1]][1]))),
as.numeric(unlist(lapply(mf_top$BgRatio,
function(x) strsplit(x,"/")[[1]][1]))))
data$gene_num.max <- rep(max(data$gene_num.rich),dim(data)[1])
# -log10
data$"-log10Pvalue" <- c(-log10(bp_top$pvalue),-log10(cc_top$pvalue),-log10(mf_top$pvalue))
data$rich_gene_num <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][1]))),
as.numeric(unlist(lapply(cc_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][1]))),
as.numeric(unlist(lapply(mf_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][1]))))
data$DEGs <- c(as.numeric(unlist(lapply(bp_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][2]))),
as.numeric(unlist(lapply(cc_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][2]))),
as.numeric(unlist(lapply(mf_top$GeneRatio,
function(x) strsplit(x,"/")[[1]][2]))))
data$rich.factor <- data$rich_gene_num/data$DEGs
colnames(data)[c(1,2)] <- c("id", "ONTOLOGY")
data$id <- factor(data$id, levels = data$id)
pdf('GO_circlize.pdf', width = 12, height = 10)
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]
gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]]))
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,
panel.fun = function(x, y) {
ylim = get.cell.meta.data('ycenter')
xlim = get.cell.meta.data('xcenter')
sector.name = get.cell.meta.data('sector.index')
circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE)
circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)
} )
#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]
label_data <- data['gene_num.rich']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)
} )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
} )
#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')]
label_data <- data['ONTOLOGY']
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,
panel.fun = function(region, value, ...) {
sector.name = get.cell.meta.data('sector.index')
circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
} )
#Add Legend
category_legend <- Legend(
labels = c('Biological Process', 'Cellular Component','Molecular Function'),
type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'),
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
# labels = c('Up-regulated', 'Down-regulated'),
# type = 'points', pch = NA, background = c('#FFA500', '#FF3030'),
# labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
colorRampPalette(c('#FFA500', '#FF3030'))(6)),
legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')
lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()
png('GO_circlize.png', width=800, height=600, units="px")
#First circle, draw ko
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max')]
gotype <- table(data$ONTOLOGY)
ko_color <- c(rep('#FF7F50',gotype[[1]]), rep('#87CEEB',gotype[[2]]), rep('#FFFF99',gotype[[3]]))
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,
panel.fun = function(x, y) {
ylim = get.cell.meta.data('ycenter')
xlim = get.cell.meta.data('xcenter')
sector.name = get.cell.meta.data('sector.index')
circos.axis(h = 'top', labels.cex = 0.4,labels.font = 2, labels.niceFacing = FALSE)
circos.text(xlim, ylim, sector.name, cex = 0.85,font = 2, niceFacing = FALSE)
} )
#In the second circle, draw enriched genes and enriched p-values
plot_data <- data[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]
label_data <- data['gene_num.rich']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name, cex = 0.9,font = 2, niceFacing = FALSE)
} )
#Draw the number of enriched genes in the third circle
plot_data <- data[c('id', 'gene_num.min', 'rich_gene_num', '-log10Pvalue')]
label_data <- data['rich_gene_num']
p_max <- round(max(data$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('#FFA500', '#FF3030'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name,cex = 0.9,font = 2, niceFacing = FALSE)
} )
#Draw enrichment factors in the fourth circle
plot_data <- data[c('id', 'gene_num.min', 'gene_num.max', 'rich.factor')]
label_data <- data['ONTOLOGY']
color_assign <- c('BP' = '#FF7F50', 'CC' = '#87CEEB','MF' = '#FFFF99')
circos.genomicTrack(
plot_data, ylim = c(0, max(plot_data$rich.factor)), track.height = 0.3, bg.col = 'gray95', bg.border = NA,
panel.fun = function(region, value, ...) {
sector.name = get.cell.meta.data('sector.index')
circos.genomicRect(region, value, col = color_assign[label_data[sector.name,]], border = NA, ytop.column = 1, ybottom = 0, ...)
} )
#Add Legend
category_legend <- Legend(
labels = c('Biological Process', 'Cellular Component','Molecular Function'),
type = 'points', pch = NA, background = c('#FF7F50', '#87CEEB','#FFFF99'),
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
# updown_legend <- Legend(
# labels = c('Up-regulated', 'Down-regulated'),
# type = 'points', pch = NA, background = c('#FFA500', '#FF3030'),
# labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'))
pvalue_legend <- Legend(
col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
colorRampPalette(c('#FFA500', '#FF3030'))(6)),
legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
title_gp = gpar(fontsize = 9), title_position = 'topleft', title = '-Log10(Pvalue)')
lgd_list_vertical <- packLegend(pvalue_legend,category_legend,direction = 'vertical')
draw(lgd_list_vertical,x = unit(16, "cm"),)#, y = unit(12, "cm"), just = "right"
circos.clear()
dev.off()
###---- 2.KEGG pathway analysis ----
kegg_enrich <- enrichKEGG(
gene = gene_diff,
keyType = "kegg",
organism = "hsa",
pvalueCutoff = 0.2,
pAdjustMethod = "BH",
qvalueCutoff = 0.2
)
sig_kegg <- filter(kegg_enrich,pvalue<0.05)
write.csv(sig_kegg,'KEGG_enrich_results.csv',quote = F)
top5_kegg <- sig_kegg %>%
as.data.frame() %>%
arrange(pvalue) %>%
head(5)
kegg_result <- pairwise_termsim(sig_kegg)
#可视化
kegg_emap <- emapplot(kegg_result, showCategory = 10) +
theme_minimal() +
labs(title = "KEGG Pathway Enrichment Map")
pdf('kegg_emap.pdf', width = 10, height = 10)
grid.draw(kegg_emap)
dev.off()
png('kegg_emap.png', width = 800, height = 600)
grid.draw(kegg_emap)
dev.off()
5. 结果展示
GO-Top5分析结果:
类别 | ||
---|---|---|
BP | neutrophil chemotaxis | 中性粒细胞趋化作用 |
BP | neutrophil migration | 中性粒细胞迁移 |
BP | granulocyte chemotaxis | 粒细胞趋化作用 |
BP | granulocyte migration | 粒细胞迁移 |
BP | leukocyte chemotaxis | 白细胞趋化作用 |
CC | collagen-containing extracellular matrix | 含胶原的细胞外基质 |
CC | lysosomal lumen | 溶酶体腔 |
CC | vacuolar lumen | 液泡腔 |
CC | fibrillar collagen trimer | 纤维状胶原三聚体 |
CC | banded collagen fibril | 带状胶原纤维 |
MF | immune receptor activity | 免疫受体活性 |
MF | integrin binding | 整合素结合 |
MF | extracellular matrix structural constituent | 细胞外基质结构组分 |
MF | RAGE receptor binding | RAGE受体结合 |
MF | IgG binding | IgG结合 |
可视化:
GO_circlize.png
KEGG top5-分析结果:
KEGG | |
---|---|
IL-17 signaling pathway | IL-17信号通路 |
Platelet activation | 血小板活化 |
Tuberculosis | 结核病 |
Cytoskeleton in muscle cells | 肌细胞骨架 |
Asthma | 哮喘 |
可视化:
kegg_emap.png