做chip-seq时候,使用chipseeker对找到的peak进行注释
需要加载相应的txdb文件
参考网站:
http://www.360doc.com/content/18/0402/01/33459258_742149876.shtml
https://mp.weixin.qq.com/s/_OPzvaEAbiMolCA2mqJXLw
方法1
使用gtf构建
library(GenomicFeatrues)
spombe <- makeTxDbFromGFF("test.gff3")
#从ucsc中下载,但是我没跑通
hg19.refseq.db <- makeTxDbFromUCSC(genome="hg19", table="refGene")
方法2
从Bioconductor中直接下载,Bioconductor提供了30个TxDb包
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
BiocManager::install("TxDb.Hsapiens.UCSC.hg38.knownGene")
library("TxDb.Hsapiens.UCSC.hg38.knownGene")
> hg38.refseq.db = TxDb.Hsapiens.UCSC.hg38.knownGene
> hg38.refseq.db
TxDb object:
# Db type: TxDb
# Supporting package: GenomicFeatures
# Data source: UCSC
# Genome: hg38
# Organism: Homo sapiens
# Taxonomy ID: 9606
# UCSC Table: knownGene
# UCSC Track: GENCODE V36
# Resource URL: http://genome.ucsc.edu/
# Type of Gene ID: Entrez Gene ID
# Full dataset: yes
# miRBase build ID: NA
# Nb of transcripts: 232184
# Db created by: GenomicFeatures package from Bioconductor
# Creation time: 2021-04-28 16:32:07 +0000 (Wed, 28 Apr 2021)
# GenomicFeatures version at creation time: 1.41.3
# RSQLite version at creation time: 2.2.6
# DBSCHEMAVERSION: 1.2
补充一些txbd文件的操作
参考网站:http://www.360doc.com/content/19/1202/12/50736008_876886520.shtml
> library(TxDb.Hsapiens.UCSC.hg19.knownGene)
> Txdb = TxDb.Hsapiens.UCSC.hg19.knownGene
> seqinfo(Txdb)
Seqinfo object with 93 sequences (1 circular) from hg19 genome:
seqnames seqlengths isCircular genome
chr1 249250621 <NA> hg19
chr2 243199373 <NA> hg19
chr3 198022430 <NA> hg19
chr4 191154276 <NA> hg19
chr5 180915260 <NA> hg19
... ... ... ...
chrUn_gl000245 36651 <NA> hg19
chrUn_gl000246 38154 <NA> hg19
chrUn_gl000247 36422 <NA> hg19
chrUn_gl000248 39786 <NA> hg19
chrUn_gl000249 38502 <NA> hg19
> columns(Txdb)
[1] "CDSCHROM" "CDSEND" "CDSID"
[4] "CDSNAME" "CDSSTART" "CDSSTRAND"
[7] "EXONCHROM" "EXONEND" "EXONID"
[10] "EXONNAME" "EXONRANK" "EXONSTART"
[13] "EXONSTRAND" "GENEID" "TXCHROM"
[16] "TXEND" "TXID" "TXNAME"
[19] "TXSTART" "TXSTRAND" "TXTYPE"
> seqlevels(Txdb) %>% head(10)
[1] "chr1" "chr2" "chr3" "chr4" "chr5"
[6] "chr6" "chr7" "chr8" "chr9" "chr10"
> transcripts(Txdb) #转录本信息
GRanges object with 82960 ranges and 2 metadata columns:
seqnames ranges strand |
<Rle> <IRanges> <Rle> |
[1] chr1 11874-14409 + |
[2] chr1 11874-14409 + |
[3] chr1 11874-14409 + |
[4] chr1 69091-70008 + |
[5] chr1 321084-321115 + |
... ... ... ... .
[82956] chrUn_gl000237 1-2686 - |
[82957] chrUn_gl000241 20433-36875 - |
[82958] chrUn_gl000243 11501-11530 + |
[82959] chrUn_gl000243 13608-13637 + |
[82960] chrUn_gl000247 5787-5816 - |
tx_id tx_name
<integer> <character>
[1] 1 uc001aaa.3
[2] 2 uc010nxq.1
[3] 3 uc010nxr.1
[4] 4 uc001aal.1
[5] 5 uc001aaq.2
... ... ...
[82956] 82956 uc011mgu.1
[82957] 82957 uc011mgv.2
[82958] 82958 uc011mgw.1
[82959] 82959 uc022brq.1
[82960] 82960 uc022brr.1
-------
seqinfo: 93 sequences (1 circular) from hg19 genome
> promoter = promoters(Txdb, upstream = 1000, downstream = 1000); promoter
Warning message:
In valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE) :
GRanges object contains 1 out-of-bound range
located on sequence chrUn_gl000223. Note
that ranges located on a sequence whose
length is unknown (NA) or on a circular
sequence are not considered out-of-bound
(use seqlengths() and isCircular() to get
the lengths and circularity flags of the
underlying sequences). You can use trim() to
trim these ranges. See
?`trim,GenomicRanges-method` for more
information.
GRanges object with 82960 ranges and 2 metadata columns:
seqnames ranges strand
<Rle> <IRanges> <Rle>
uc001aaa.3 chr1 10874-12873 +
uc010nxq.1 chr1 10874-12873 +
uc010nxr.1 chr1 10874-12873 +
uc001aal.1 chr1 68091-70090 +
uc001aaq.2 chr1 320084-322083 +
... ... ... ...
uc011mgu.1 chrUn_gl000237 1687-3686 -
uc011mgv.2 chrUn_gl000241 35876-37875 -
uc011mgw.1 chrUn_gl000243 10501-12500 +
uc022brq.1 chrUn_gl000243 12608-14607 +
uc022brr.1 chrUn_gl000247 4817-6816 -
| tx_id tx_name
| <integer> <character>
uc001aaa.3 | 1 uc001aaa.3
uc010nxq.1 | 2 uc010nxq.1
uc010nxr.1 | 3 uc010nxr.1
uc001aal.1 | 4 uc001aal.1
uc001aaq.2 | 5 uc001aaq.2
... . ... ...
uc011mgu.1 | 82956 uc011mgu.1
uc011mgv.2 | 82957 uc011mgv.2
uc011mgw.1 | 82958 uc011mgw.1
uc022brq.1 | 82959 uc022brq.1
uc022brr.1 | 82960 uc022brr.1
-------
seqinfo: 93 sequences (1 circular) from hg19 genome
> exons(Txdb)
GRanges object with 289969 ranges and 1 metadata column:
seqnames ranges strand |
<Rle> <IRanges> <Rle> |
[1] chr1 11874-12227 + |
[2] chr1 12595-12721 + |
[3] chr1 12613-12721 + |
[4] chr1 12646-12697 + |
[5] chr1 13221-14409 + |
... ... ... ... .
[289965] chrUn_gl000241 35706-35859 - |
[289966] chrUn_gl000241 36711-36875 - |
[289967] chrUn_gl000243 11501-11530 + |
[289968] chrUn_gl000243 13608-13637 + |
[289969] chrUn_gl000247 5787-5816 - |
exon_id
<integer>
[1] 1
[2] 2
[3] 3
[4] 4
[5] 5
... ...
[289965] 289965
[289966] 289966
[289967] 289967
[289968] 289968
[289969] 289969
-------
seqinfo: 93 sequences (1 circular) from hg19 genome
同理:
cds(Txdb) #获得cds 区域
transcriptBy(Txdb) #基于基因的转录本分Granges list
exonsBy(Txdb) #基于基因的exons分Granges list