2025-08-25 2025-08-23 小马的acRIP call完peak用limma分析

DESeq2分析出来的结果很烂，换了limma先用IP/INPUT在做组间比较

library(tidyverse)

library(limma)

library(ChIPseeker)

library(GenomicRanges)

library(TxDb.Mmusculus.UCSC.mm10.knownGene) # 小鼠 mm10

library(org.Mm.eg.db)

library(ggplot2)

library(ggrepel)

# ========= 0) 输入文件 =========

infile <- "all_counts.txt" # 列：Sham2_IP, Sham3_IP, Sham4_IP, SNI1_IP, SNI2_IP, SNI4_IP, Sham_Input, SNI_Input

# ========= 1) 读入数据 =========

counts <- read.table(infile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)

counts[is.na(counts)] <- 0

# 必要列检查

ip_cols <- c("Sham2_IP","Sham3_IP","Sham4_IP","SNI1_IP","SNI2_IP","SNI4_IP")

input_cols <- c("Sham_Input","SNI_Input")

need_cols <- c(ip_cols, input_cols)

missing <- setdiff(need_cols, colnames(counts))

if (length(missing) > 0) stop("下列列名在 all_counts.txt 中缺失: ", paste(missing, collapse=", "))

# ========= 2) 计算 log2(IP/Input) enrichment =========

ps <- 1 # 伪计数，避免除零

enrich_mat <- sapply(ip_cols, function(ipc){

denom <- if (grepl("^Sham", ipc)) "Sham_Input" else "SNI_Input"

log2((counts[[ipc]] + ps) / (counts[[denom]] + ps))

})

enrich_mat <- as.data.frame(enrich_mat)

rownames(enrich_mat) <- rownames(counts)

colnames(enrich_mat) <- ip_cols

# 保存 enrichment 矩阵（可复现）

write.table(enrich_mat, file = "log2_enrichment_matrix.txt", sep = "\t", quote = FALSE, col.names = NA)

# ========= 3) 组信息与设计矩阵 =========

group <- ifelse(grepl("^Sham", colnames(enrich_mat)), "Sham", "SNI")

group <- factor(group, levels = c("Sham","SNI"))

design <- model.matrix(~0 + group)

colnames(design) <- levels(group)

# ========= 4) limma 差异分析（在 log2 enrichment 上拟合）=========

fit <- lmFit(enrich_mat, design)

cont <- makeContrasts(SNIvsSham = SNI - Sham, levels = design)

fit2 <- contrasts.fit(fit, cont)

fit2 <- eBayes(fit2, trend = TRUE, robust = TRUE)

res <- topTable(fit2, coef = "SNIvsSham", number = Inf, sort.by = "none")

res <- res %>% tibble::rownames_to_column("peak_coord") # 保留峰坐标

# res 列典型包含：peak_coord, logFC, AveExpr, t, P.Value, adj.P.Val, B

# ========= 5) 峰注释（ChIPseeker, mm10）=========

# 将 peak_coord 拆成 GRanges

split_peak <- strsplit(res$peak_coord, "_")

peak_df <- tibble(

peak_coord = res$peak_coord,

chr = vapply(split_peak, `[`, character(1L), 1),

start = as.numeric(vapply(split_peak, `[`, character(1L), 2)),

end = as.numeric(vapply(split_peak, `[`, character(1L), 3))

)

# 过滤掉解析失败的行（极少见）

peak_df <- peak_df %>% filter(!is.na(start), !is.na(end))

gr <- makeGRangesFromDataFrame(

peak_df,

keep.extra.columns = TRUE,

seqnames.field = "chr",

start.field = "start",

end.field = "end"

)

txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene

anno <- annotatePeak(gr, TxDb = txdb, tssRegion = c(-3000, 3000), annoDb = "org.Mm.eg.db")

annotated_peaks <- as.data.frame(anno) %>%

# 重新拼回与 peak_coord 相同的键，保证可匹配

mutate(peak_coord = paste(seqnames, start, end, sep = "_")) %>%

dplyr::select(

peak_coord, SYMBOL, geneId, annotation, distanceToTSS

)

# ========= 6) 合并差异结果 + 注释，并输出 =========

res_annotated <- res %>%

left_join(annotated_peaks, by = "peak_coord")

# 保存“所有峰 + 注释 + 差异统计”

write.csv(res_annotated, "limma_results_enrichment_annotated.csv", row.names = FALSE)

# ========= 7) 火山图（FDR<0.05 标红，并标注 SYMBOL）=========

res_annotated <- res_annotated %>%

mutate(Significant = !is.na(adj.P.Val) & adj.P.Val < 0.05)

# 为避免标签过多，这里只标注显著性最强的前 N 个（可调整）

N_label <- 50

label_df <- res_annotated %>%

filter(Significant) %>%

arrange(adj.P.Val) %>%

mutate(label = ifelse(!is.na(SYMBOL) & SYMBOL != "", SYMBOL, NA_character_)) %>%

slice_head(n = N_label)

p <- ggplot(res_annotated, aes(x = logFC, y = -log10(adj.P.Val), color = Significant)) +

geom_point(alpha = 0.7, size = 1.6) +

scale_color_manual(values = c(`TRUE` = "red", `FALSE` = "grey70")) +

geom_hline(yintercept = -log10(0.05), linetype = "dashed") +

labs(

title = "Volcano plot (log2(IP/Input), SNI vs Sham)",

x = "log2 Fold Change (SNI - Sham) on log2(IP/Input)",

y = "-log10(FDR)",

color = "FDR < 0.05"

) +

theme_bw(base_size = 13)

# 只给显著里最强的 N 个加标签

if (nrow(label_df) > 0) {

p <- p + ggrepel::geom_text_repel(

data = label_df,

aes(label = label),

size = 3,

max.overlaps = Inf,

min.segment.length = 0,

box.padding = 0.3,

point.padding = 0.2

)

}

ggsave("volcano_enrichment_annot.png", p, width = 7, height = 5.5, dpi = 300)

message("完成：

- log2_enrichment_matrix.txt

- limma_results_enrichment_annotated.csv

- volcano_enrichment_annot.png")

2025-08-25 2025-08-23 小马的acRIP call完peak用limma分析

2025-08-25 2025-08-23 小马的acRIP call完peak用limma分析

相关阅读更多精彩内容

友情链接更多精彩内容