TCseq包可以应用于转录组,单细胞转录组,ATAC-seq,Chip-seq等的表观基因组时序型数据分析。该包主要用于不同时间点的差异分析,时间趋势分析及可视化作图。
官网:http://bioconductor.org/packages/release/bioc/html/TCseq.html
## 下载
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("TCseq")
## 查看网页版说明
browseVignettes("TCseq")
1. 数据准备,构建TCA对象
data("genomicIntervals") #基因的位置信息文件(染色体,起始及终止位点,基因id四列)
head(genomicIntervals)
# chr start end id
# 1 chr1 6453992 6454731 peak1
# 2 chr1 7823890 7824372 peak2
# 3 chr1 8029820 8030138 peak3
# 4 chr1 8030317 8030627 peak4
# 5 chr1 10880000 10880701 peak5
# 6 chr1 13154468 13154786 peak6
data("experiment_BAMfile")
head(experiment_BAMfile)
# sampleid timepoint group bamfile
# 1 s1 0h 1 rep1_0h.bam
# 2 s2 24h 2 rep1_24h.bam
# 3 s3 40h 3 rep1_40h.bam
# 4 s4 56h 4 rep1_56h.bam
# 5 s5 72h 5 rep1_72h.bam
# 6 s6 120h 6 rep1_120h.bam
tca <- TCA(design = experiment_BAMfile, genomicFeature = genomicIntervals)
tca
data("experiment") #分组文件(包含样品,时间点,分组信息)
head(experiment)
# sampleid timepoint group
# 1 s1 0h 1
# 2 s2 24h 2
# 3 s3 40h 3
# 4 s4 56h 4
# 5 s5 72h 5
# 6 s6 120h 6
data("countsTable")#基因表达矩阵文件(可以直接拿原始数据)
head(countsTable)
# s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12
# peak1 344 243 169 70 57 20 298 199 135 63 54 34
# peak2 72 114 91 93 133 164 55 71 93 116 150 191
# peak3 28 50 109 115 94 109 60 89 75 129 85 101
# peak4 28 49 110 113 103 108 59 89 77 131 83 104
# peak5 464 344 280 154 108 33 444 255 259 155 97 32
# peak6 175 129 77 29 15 4 134 91 44 33 11 5
tca <- TCA(design = experiment, genomicFeature = genomicIntervals,counts = countsTable)
tca
counts(tca) <- countsTable
suppressWarnings(library(SummarizedExperiment))
se <- SummarizedExperiment(assays=list(counts = countsTable), colData = experiment)
tca <- TCAFromSummarizedExperiment(se = se, genomicFeature = genomicIntervals)
2. 差异分析
该包内置了我们常用的差异分析edgeR包,通过使用广义线性模型(GLM)方法去鉴定差异基因。
tca <- DBanalysis(tca)
tca <- DBanalysis(tca, filter.type = "raw", filter.value = 10, samplePassfilter = 2)
DBres <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"))
str(DBres, strict.width = "cut")
head(DBres$`24hvs0h`)
DBres.sig <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"), top.sig = TRUE)
str(DBres.sig, strict.width = "cut")
3. 时间趋势分析
tca <- timecourseTable(tca, value = "FC", norm.method = "rpkm", filter = TRUE)
tca <- timecourseTable(tca, value = "expression", norm.method = "rpkm", filter = TRUE)
t <- tcTable(tca)
head(t)
tca <- timeclust(tca, algo = "cm", k = 6, standardize = TRUE)
p <- timeclustplot(tca, value = "z-score(PRKM)", cols = 3)
print(p[[1]])