DESeq2分析出来的结果很烂,换了limma先用IP/INPUT在做组间比较
library(tidyverse)
library(limma)
library(ChIPseeker)
library(GenomicRanges)
library(TxDb.Mmusculus.UCSC.mm10.knownGene) # 小鼠 mm10
library(org.Mm.eg.db)
library(ggplot2)
library(ggrepel)
# ========= 0) 输入文件 =========
infile <- "all_counts.txt" # 列:Sham2_IP, Sham3_IP, Sham4_IP, SNI1_IP, SNI2_IP, SNI4_IP, Sham_Input, SNI_Input
# ========= 1) 读入数据 =========
counts <- read.table(infile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)
counts[is.na(counts)] <- 0
# 必要列检查
ip_cols <- c("Sham2_IP","Sham3_IP","Sham4_IP","SNI1_IP","SNI2_IP","SNI4_IP")
input_cols <- c("Sham_Input","SNI_Input")
need_cols <- c(ip_cols, input_cols)
missing <- setdiff(need_cols, colnames(counts))
if (length(missing) > 0) stop("下列列名在 all_counts.txt 中缺失: ", paste(missing, collapse=", "))
# ========= 2) 计算 log2(IP/Input) enrichment =========
ps <- 1 # 伪计数,避免除零
enrich_mat <- sapply(ip_cols, function(ipc){
denom <- if (grepl("^Sham", ipc)) "Sham_Input" else "SNI_Input"
log2((counts[[ipc]] + ps) / (counts[[denom]] + ps))
})
enrich_mat <- as.data.frame(enrich_mat)
rownames(enrich_mat) <- rownames(counts)
colnames(enrich_mat) <- ip_cols
# 保存 enrichment 矩阵(可复现)
write.table(enrich_mat, file = "log2_enrichment_matrix.txt", sep = "\t", quote = FALSE, col.names = NA)
# ========= 3) 组信息与设计矩阵 =========
group <- ifelse(grepl("^Sham", colnames(enrich_mat)), "Sham", "SNI")
group <- factor(group, levels = c("Sham","SNI"))
design <- model.matrix(~0 + group)
colnames(design) <- levels(group)
# ========= 4) limma 差异分析(在 log2 enrichment 上拟合)=========
fit <- lmFit(enrich_mat, design)
cont <- makeContrasts(SNIvsSham = SNI - Sham, levels = design)
fit2 <- contrasts.fit(fit, cont)
fit2 <- eBayes(fit2, trend = TRUE, robust = TRUE)
res <- topTable(fit2, coef = "SNIvsSham", number = Inf, sort.by = "none")
res <- res %>% tibble::rownames_to_column("peak_coord") # 保留峰坐标
# res 列典型包含:peak_coord, logFC, AveExpr, t, P.Value, adj.P.Val, B
# ========= 5) 峰注释(ChIPseeker, mm10)=========
# 将 peak_coord 拆成 GRanges
split_peak <- strsplit(res$peak_coord, "_")
peak_df <- tibble(
peak_coord = res$peak_coord,
chr = vapply(split_peak, `[`, character(1L), 1),
start = as.numeric(vapply(split_peak, `[`, character(1L), 2)),
end = as.numeric(vapply(split_peak, `[`, character(1L), 3))
)
# 过滤掉解析失败的行(极少见)
peak_df <- peak_df %>% filter(!is.na(start), !is.na(end))
gr <- makeGRangesFromDataFrame(
peak_df,
keep.extra.columns = TRUE,
seqnames.field = "chr",
start.field = "start",
end.field = "end"
)
txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene
anno <- annotatePeak(gr, TxDb = txdb, tssRegion = c(-3000, 3000), annoDb = "org.Mm.eg.db")
annotated_peaks <- as.data.frame(anno) %>%
# 重新拼回与 peak_coord 相同的键,保证可匹配
mutate(peak_coord = paste(seqnames, start, end, sep = "_")) %>%
dplyr::select(
peak_coord, SYMBOL, geneId, annotation, distanceToTSS
)
# ========= 6) 合并差异结果 + 注释,并输出 =========
res_annotated <- res %>%
left_join(annotated_peaks, by = "peak_coord")
# 保存“所有峰 + 注释 + 差异统计”
write.csv(res_annotated, "limma_results_enrichment_annotated.csv", row.names = FALSE)
# ========= 7) 火山图(FDR<0.05 标红,并标注 SYMBOL)=========
res_annotated <- res_annotated %>%
mutate(Significant = !is.na(adj.P.Val) & adj.P.Val < 0.05)
# 为避免标签过多,这里只标注显著性最强的前 N 个(可调整)
N_label <- 50
label_df <- res_annotated %>%
filter(Significant) %>%
arrange(adj.P.Val) %>%
mutate(label = ifelse(!is.na(SYMBOL) & SYMBOL != "", SYMBOL, NA_character_)) %>%
slice_head(n = N_label)
p <- ggplot(res_annotated, aes(x = logFC, y = -log10(adj.P.Val), color = Significant)) +
geom_point(alpha = 0.7, size = 1.6) +
scale_color_manual(values = c(`TRUE` = "red", `FALSE` = "grey70")) +
geom_hline(yintercept = -log10(0.05), linetype = "dashed") +
labs(
title = "Volcano plot (log2(IP/Input), SNI vs Sham)",
x = "log2 Fold Change (SNI - Sham) on log2(IP/Input)",
y = "-log10(FDR)",
color = "FDR < 0.05"
) +
theme_bw(base_size = 13)
# 只给显著里最强的 N 个加标签
if (nrow(label_df) > 0) {
p <- p + ggrepel::geom_text_repel(
data = label_df,
aes(label = label),
size = 3,
max.overlaps = Inf,
min.segment.length = 0,
box.padding = 0.3,
point.padding = 0.2
)
}
ggsave("volcano_enrichment_annot.png", p, width = 7, height = 5.5, dpi = 300)
message("完成:
- log2_enrichment_matrix.txt
- limma_results_enrichment_annotated.csv
- volcano_enrichment_annot.png")