cat Homo_sapiens.GRCh38.94.chr_patch_hapl_scaff.sorted.gtf | cut -f 9 | tr ';' '\n' | grep "gene_biotype" | sed 's/gene_biotype//' | sort | uniq -c
ref: 在R语言中读取GTF文件的最好方法 | 果子学生信
source("https://bioconductor.org/biocLite.R")
biocLite("rtracklayer")
biocLite("SummarizedExperiment")
gtf1 <- rtracklayer::import('Homo_sapiens.GRCh38.90.chr.gtf')
gtf_df <- as.data.frame(gtf1)
test <- gtf_df[1:5,] #取第一至五行
View(test)
geneid_df <- dplyr::select(gtf_df,c(seqnames,start,end,strand,type,gene_name,gene_id,gene_biotype))
write.table(geneid_df,"geneid_df.txt",sep="\t",quote = F,col.names = F,row.names = F)