2025-08-25 2025-08-23 小马的acRIP call完peak用limma分析

DESeq2分析出来的结果很烂,换了limma先用IP/INPUT在做组间比较

  library(tidyverse)

  library(limma)

  library(ChIPseeker)

  library(GenomicRanges)

  library(TxDb.Mmusculus.UCSC.mm10.knownGene)  # 小鼠 mm10

  library(org.Mm.eg.db)

  library(ggplot2)

  library(ggrepel)

# ========= 0) 输入文件 =========

infile <- "all_counts.txt"  # 列:Sham2_IP, Sham3_IP, Sham4_IP, SNI1_IP, SNI2_IP, SNI4_IP, Sham_Input, SNI_Input

# ========= 1) 读入数据 =========

counts <- read.table(infile, header = TRUE, row.names = 1, sep = "\t", check.names = FALSE)

counts[is.na(counts)] <- 0

# 必要列检查

ip_cols  <- c("Sham2_IP","Sham3_IP","Sham4_IP","SNI1_IP","SNI2_IP","SNI4_IP")

input_cols <- c("Sham_Input","SNI_Input")

need_cols <- c(ip_cols, input_cols)

missing <- setdiff(need_cols, colnames(counts))

if (length(missing) > 0) stop("下列列名在 all_counts.txt 中缺失: ", paste(missing, collapse=", "))

# ========= 2) 计算 log2(IP/Input) enrichment =========

ps <- 1  # 伪计数,避免除零

enrich_mat <- sapply(ip_cols, function(ipc){

  denom <- if (grepl("^Sham", ipc)) "Sham_Input" else "SNI_Input"

  log2((counts[[ipc]] + ps) / (counts[[denom]] + ps))

})

enrich_mat <- as.data.frame(enrich_mat)

rownames(enrich_mat) <- rownames(counts)

colnames(enrich_mat) <- ip_cols

# 保存 enrichment 矩阵(可复现)

write.table(enrich_mat, file = "log2_enrichment_matrix.txt", sep = "\t", quote = FALSE, col.names = NA)

# ========= 3) 组信息与设计矩阵 =========

group <- ifelse(grepl("^Sham", colnames(enrich_mat)), "Sham", "SNI")

group <- factor(group, levels = c("Sham","SNI"))

design <- model.matrix(~0 + group)

colnames(design) <- levels(group)

# ========= 4) limma 差异分析(在 log2 enrichment 上拟合)=========

fit  <- lmFit(enrich_mat, design)

cont <- makeContrasts(SNIvsSham = SNI - Sham, levels = design)

fit2 <- contrasts.fit(fit, cont)

fit2 <- eBayes(fit2, trend = TRUE, robust = TRUE)

res <- topTable(fit2, coef = "SNIvsSham", number = Inf, sort.by = "none")

res <- res %>% tibble::rownames_to_column("peak_coord")  # 保留峰坐标

# res 列典型包含:peak_coord, logFC, AveExpr, t, P.Value, adj.P.Val, B

# ========= 5) 峰注释(ChIPseeker, mm10)=========

# 将 peak_coord 拆成 GRanges

split_peak <- strsplit(res$peak_coord, "_")

peak_df <- tibble(

  peak_coord = res$peak_coord,

  chr  = vapply(split_peak, `[`, character(1L), 1),

  start = as.numeric(vapply(split_peak, `[`, character(1L), 2)),

  end  = as.numeric(vapply(split_peak, `[`, character(1L), 3))

)

# 过滤掉解析失败的行(极少见)

peak_df <- peak_df %>% filter(!is.na(start), !is.na(end))

gr <- makeGRangesFromDataFrame(

  peak_df,

  keep.extra.columns = TRUE,

  seqnames.field = "chr",

  start.field    = "start",

  end.field      = "end"

)

txdb <- TxDb.Mmusculus.UCSC.mm10.knownGene

anno <- annotatePeak(gr, TxDb = txdb, tssRegion = c(-3000, 3000), annoDb = "org.Mm.eg.db")

annotated_peaks <- as.data.frame(anno) %>%

  # 重新拼回与 peak_coord 相同的键,保证可匹配

  mutate(peak_coord = paste(seqnames, start, end, sep = "_")) %>%

  dplyr::select(

    peak_coord, SYMBOL, geneId, annotation, distanceToTSS

  )

# ========= 6) 合并差异结果 + 注释,并输出 =========

res_annotated <- res %>%

  left_join(annotated_peaks, by = "peak_coord")

# 保存“所有峰 + 注释 + 差异统计”

write.csv(res_annotated, "limma_results_enrichment_annotated.csv", row.names = FALSE)

# ========= 7) 火山图(FDR<0.05 标红,并标注 SYMBOL)=========

res_annotated <- res_annotated %>%

  mutate(Significant = !is.na(adj.P.Val) & adj.P.Val < 0.05)

# 为避免标签过多,这里只标注显著性最强的前 N 个(可调整)

N_label <- 50

label_df <- res_annotated %>%

  filter(Significant) %>%

  arrange(adj.P.Val) %>%

  mutate(label = ifelse(!is.na(SYMBOL) & SYMBOL != "", SYMBOL, NA_character_)) %>%

  slice_head(n = N_label)

p <- ggplot(res_annotated, aes(x = logFC, y = -log10(adj.P.Val), color = Significant)) +

  geom_point(alpha = 0.7, size = 1.6) +

  scale_color_manual(values = c(`TRUE` = "red", `FALSE` = "grey70")) +

  geom_hline(yintercept = -log10(0.05), linetype = "dashed") +

  labs(

    title = "Volcano plot (log2(IP/Input), SNI vs Sham)",

    x = "log2 Fold Change (SNI - Sham) on log2(IP/Input)",

    y = "-log10(FDR)",

    color = "FDR < 0.05"

  ) +

  theme_bw(base_size = 13)

# 只给显著里最强的 N 个加标签

if (nrow(label_df) > 0) {

  p <- p + ggrepel::geom_text_repel(

    data = label_df,

    aes(label = label),

    size = 3,

    max.overlaps = Inf,

    min.segment.length = 0,

    box.padding = 0.3,

    point.padding = 0.2

  )

}

ggsave("volcano_enrichment_annot.png", p, width = 7, height = 5.5, dpi = 300)

message("完成:

- log2_enrichment_matrix.txt

- limma_results_enrichment_annotated.csv

- volcano_enrichment_annot.png")

©著作权归作者所有,转载或内容合作请联系作者
【社区内容提示】社区部分内容疑似由AI辅助生成,浏览时请结合常识与多方信息审慎甄别。
平台声明:文章内容(如有图片或视频亦包括在内)由作者上传并发布,文章内容仅代表作者本人观点,简书系信息发布平台,仅提供信息存储服务。

相关阅读更多精彩内容

友情链接更多精彩内容