所有bug暂且不表,只记录有用的
环境&包,因为是新电脑,全是新装的
if (!require("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install(c(
"GenomicRanges", # 用于处理基因组范围数据
"rtracklayer", # 用于导入/导出BED、GTF等文件
"TxDb.Mmusculus.UCSC.mm10.knownGene", # mm10基因组注释
"Guitar" # 用于绘制metagene plot
))
install.packages(c(
"tidyverse", # 数据整理和可视化
"ggplot2", # 高级绘图
"data.table", # 高效数据处理
"devtools" # 用于从GitHub安装包
))
library(GenomicRanges)
library(rtracklayer)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)
library(Guitar)
library(tidyverse)
SNI的,主要bug是mm10.fa的染色质序号没有chr前缀,
BED文件中的线粒体染色体名称为 MT,而 TxDb 中使用 M,需要手动修正
以及存在非标准染色体
报错信息大概是 1: In .merge_two_Seqinfo_objects(x, y) : Each of the 2 combined objects has sequence levels not in the other: - in 'x': chrGL456233.2, chrJH584304.1, chrMT, chrMU069435.1
# 1. 读取BED文件
bed_path_sni <- "F:/XZHMU/huangyue/motif/RIP_SNI_peaks_summits.bed"
bed_sni <- import(bed_path_sni, format = "bed") # 明确指定文件格式为bed
# 2. 修正染色体名称(添加chr前缀)
bed_ucsc_sni <- GRanges(
seqnames = paste0("chr", seqnames(bed_sni)),
ranges = ranges(bed_sni),
strand = strand(bed_sni),
mcols = mcols(bed_sni)
)
# 3. 统一线粒体染色体名称(chrMT -> chrM)
seqlevels(bed_ucsc_sni)[seqlevels(bed_ucsc_sni) == "chrMT"] <- "chrM"
# 4. 过滤非标准染色体(仅保留chr1-chr19, chrX, chrY, chrM)
valid_chr <- seqlevels(TxDb.Mmusculus.UCSC.mm10.knownGene)
valid_chr <- valid_chr[grep("^chr[0-9XYM]+$", valid_chr)]
bed_filtered_sni <- bed_ucsc_sni[seqnames(bed_ucsc_sni) %in% valid_chr]
# 5. 导出为新的BED文件
export(bed_filtered_sni, "F:/XZHMU/huangyue/motif/RIP_SNI_peaks_summits_UCSC_final.bed")
# 6. 运行GuitarPlot
p_sni <- GuitarPlot(
txTxdb = TxDb.Mmusculus.UCSC.mm10.knownGene,
stBedFiles = "F:/XZHMU/huangyue/motif/RIP_SNI_peaks_summits_UCSC_final.bed",
headOrtail = FALSE,
enableCI = FALSE,
mapFilterTranscript = TRUE,
pltTxType = "mrna",
stGroupName = "SNI"
)
# 7. 显示结果
print(p_sni)
SH
# 1. 读取BED文件bed_path_sh <- "F:/XZHMU/huangyue/motif/RIP_sh_peaks_summits.bed"
bed_sh <- import(bed_path_sh, format = "bed") # 明确指定文件格式为bed
# 2. 修正染色体名称(添加chr前缀)
bed_ucsc_sh <- GRanges(
seqnames = paste0("chr", seqnames(bed_sh)),
ranges = ranges(bed_sh),
strand = strand(bed_sh),
mcols = mcols(bed_sh)
)
# 3. 统一线粒体染色体名称(chrMT -> chrM)
seqlevels(bed_ucsc_sh)[seqlevels(bed_ucsc_sh) == "chrMT"] <- "chrM"
# 4. 过滤非标准染色体(仅保留chr1-chr19, chrX, chrY, chrM)
valid_chr <- seqlevels(TxDb.Mmusculus.UCSC.mm10.knownGene)
valid_chr <- valid_chr[grep("^chr[0-9XYM]+$", valid_chr)]
bed_filtered_sh <- bed_ucsc_sh[seqnames(bed_ucsc_sh) %in% valid_chr]
# 5. 导出为新的BED文件
export(bed_filtered_sh, "F:/XZHMU/huangyue/motif/RIP_sh_peaks_summits_UCSC_final.bed")
# 6. 运行GuitarPlot
p_sh <- GuitarPlot(
txTxdb = TxDb.Mmusculus.UCSC.mm10.knownGene,
stBedFiles = "F:/XZHMU/huangyue/motif/RIP_sh_peaks_summits_UCSC_final.bed", # 去掉多余的空格
headOrtail = FALSE,
enableCI = FALSE,
mapFilterTranscript = TRUE,
pltTxType = "mrna",
stGroupName = "sh"
)
# 7. 显示结果
print(p_sh)
SHAM
# 1. 读取BED文件
bed_path_sham <- "F:/XZHMU/huangyue/motif/RIP_Sham_peaks_summits.bed"
bed_sham <- import(bed_path_sham)
# 2. 修正染色体名称(添加chr前缀)
bed_ucsc_sham <- GRanges(
seqnames = paste0("chr", seqnames(bed_sham)),
ranges = ranges(bed_sham),
strand = strand(bed_sham),
mcols = mcols(bed_sham)
)
# 3. 统一线粒体染色体名称(chrMT -> chrM)
seqlevels(bed_ucsc_sham)[seqlevels(bed_ucsc_sham) == "chrMT"] <- "chrM"
# 4. 过滤非标准染色体(仅保留chr1-chr19, chrX, chrY, chrM)
valid_chr <- seqlevels(TxDb.Mmusculus.UCSC.mm10.knownGene)
valid_chr <- valid_chr[grep("^chr[0-9XYM]+$", valid_chr)]
bed_filtered_sham <- bed_ucsc_sham[seqnames(bed_ucsc_sham) %in% valid_chr]
# 5. 导出为新的BED文件
export(bed_filtered_sham, "F:/XZHMU/huangyue/motif/RIP_sham_peaks_summits_UCSC_final.bed")
# 6. 运行GuitarPlot
p_sham <- GuitarPlot(
txTxdb = TxDb.Mmusculus.UCSC.mm10.knownGene,
stBedFiles = "F:/XZHMU/huangyue/motif/RIP_sham_peaks_summits_UCSC_final.bed", # 传入文件路径
headOrtail = FALSE,
enableCI = FALSE,
mapFilterTranscript = TRUE,
pltTxType = "mrna",
stGroupName = "sham"
)
# 7. 显示结果
print(p_sham)
合并两个图
# 读取两组数据
bed_path_sham <- "F:/XZHMU/huangyue/motif/RIP_sham_peaks_summits_UCSC_final.bed"
bed_path_sni <- "F:/XZHMU/huangyue/motif/RIP_SNI_peaks_summits_UCSC_final.bed"
bed_sham <- import(bed_path_sham, format = "bed")
bed_sni <- import(bed_path_sni, format = "bed")
# 添加分组信息
mcols(bed_sham)$group <- "sham"
mcols(bed_sni)$group <- "SNI"
# 合并两组数据
combined_bed <- c(bed_sham, bed_sni)
# 读取转录组数据库
txdb_file <- system.file("extdata", "mm10_toy.sqlite", package = "Guitar")
txdb <- loadDb(txdb_file)
# 运行 GuitarPlot
p_combined <- GuitarPlot(
txTxdb = txdb,
stBedFiles = list(bed_sham, bed_sni), # 使用列表传递两组数据
stGroupName = c("sham", "SNI"), # 分组名称
colors = c("blue", "red"), # 为不同分组指定颜色
pltTxType = "mrna",
headOrtail = FALSE,
enableCI = FALSE,
mapFilterTranscript = TRUE
)
# 显示结果
print(p_combined)