今天无意间看到了一个图被惊艳到了,随后自己写代码,试了一下效果。做出来了,但有点细微区别,还请高人指点。
777639203090330380.jpg
R CODE如下
rm(list = ls())
options(stringsAsFactors = F)
# 加载必要包
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggplot2)
library(dplyr)
library(enrichplot)
# 读取差异表达数据
de_data <- read.csv("mRNADE.csv")
# 只保留显著差异的基因
sig_de_data <- de_data %>%
filter(!change %in% c("Stable", "Not significant"))
# 基因ID转换
symbol_mapping <- bitr(sig_de_data$Name,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)
# 分组基因列表 - 只包含上调和下调基因
gene_list <- split(
symbol_mapping$ENTREZID,
sig_de_data$change[match(symbol_mapping$SYMBOL, sig_de_data$Name)]
)
# 并行KEGG富集分析
kegg_results <- compareCluster(
geneClusters = gene_list,
fun = "enrichKEGG",
organism = "hsa", # 人类KEGG代码
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.2
)
# 确定上调和下调的列名
up_pattern <- "up|Up|UP|upregulated|Upregulated|UPREGULATED"
down_pattern <- "down|Down|DOWN|downregulated|Downregulated|DOWNREGULATED"
# 将结果转换为数据框
kegg_df <- as.data.frame(kegg_results)
# 分别为每个组别选择前10个通路
kegg_up <- kegg_df %>%
filter(grepl(up_pattern, Cluster)) %>%
top_n(10, -p.adjust)
kegg_down <- kegg_df %>%
filter(grepl(down_pattern, Cluster)) %>%
top_n(10, -p.adjust)
# 合并上调和下调的结果
kegg_top20 <- rbind(kegg_up, kegg_down)
# 计算RichFactor (GeneRatio的数值表示)
kegg_top20 <- kegg_top20 %>%
mutate(
GeneRatio_num = sapply(strsplit(GeneRatio, "/"), function(x) as.numeric(x[1])/as.numeric(x[2])),
# 根据实际的Cluster值判断上下调
RichFactor = case_when(
grepl(up_pattern, Cluster) ~ GeneRatio_num,
grepl(down_pattern, Cluster) ~ -GeneRatio_num,
TRUE ~ GeneRatio_num # 默认情况
)
)
# 高级可视化 - KEGG富集图
custom_plot <- ggplot(kegg_top20,
aes(x = RichFactor,
y = reorder(Description, abs(RichFactor)),
size = Count,
color = -log10(p.adjust))) +
geom_point(alpha = 0.8) +
geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
scale_color_gradient(low = "#E6E6FA", high = "#4B0082",
name = "-log10(p.adj)") +
scale_size_continuous(range = c(3, 8),
name = "Gene count") +
# 添加分类颜色线条
geom_segment(aes(x = 0, xend = RichFactor,
y = reorder(Description, abs(RichFactor)),
yend = reorder(Description, abs(RichFactor)),
color = NULL),
size = 0.5, alpha = 0.6) +
# 添加x轴标签
scale_x_continuous(
labels = function(x) abs(x),
breaks = pretty(kegg_top20$RichFactor)
) +
labs(x = "RichFactor",
y = "KEGG Pathways",
title = "Top 10 KEGG Pathways for Up/Down Regulated Genes") +
theme_minimal(base_size = 12) +
theme(
panel.grid.major = element_line(color = "grey90"),
axis.text.y = element_text(face = "bold", color = "black"),
legend.position = "right",
plot.title = element_text(hjust = 0.5, face = "bold")
) +
# 添加下方的标签
annotate("text", x = -max(abs(kegg_top20$RichFactor))/2, y = -1, label = "Down", size = 4) +
annotate("text", x = max(abs(kegg_top20$RichFactor))/2, y = -1, label = "Up", size = 4)
custom_plot
# 输出高清图
ggsave("KEGG_analysis_richfactor.tiff",
plot = custom_plot,
device = "tiff",
dpi = 600,
width = 14,
height = 10,
compression = "lzw")