#读取注释文件
> read.table("PA_NC_specific.anno (distribution).txt", header = F, sep = "\t", quote = "")
V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14
1 chr1 205749988 205749989 chr1:205749771-205750206 chr1 205712819 205750276 ENST00000367142.4 0 - NUCKS1 coding 5UTR 95
2 chr22 36227753 36227754 chr22:36227583-36227924 chr22 36226203 36239643 ENST00000358502.9 0 - APOL2 coding CDS 66
3 chr1 212608907 212608908 chr1:212608769-212609046 chr1 212608628 212620772 ENST00000613954.4 0 + ATF3 coding 5UTR 63
4 chr3 184261175 184261176 chr3:184260989-184261362 chr3 184259213 184261463 ENST00000296238.3 0 - CAMK2N2 coding CDS 47
5 chr10 86499737 86499738 chr10:86499570-86499904 chr10 86435257 86521768 ENST00000618527.4 0 - WAPL coding CDS 43
6 chr8 143919203 143919204 chr8:143918854-143919552 chr8 143915153 143950876 ENST00000322810.8 0 - PLEC coding CDS 79
7 chr12 14803321 14803322 chr12:14803170-14803473 chr12 14784579 14803540 ENST00000261167.6 0 - WBP11 coding intron 2
8 chr2 105334705 105334706 chr2:105334480-105334931 chr2 105334027 105337475 ENST00000610036.1 0 - RP11-332H14.2 noncoding exon 81
9 chr1 144423827 144423828 chr1:144423706-144423949 chr1 144421386 144456809 ENST00000577412.5 0 - NBPF15 coding intron 8
10 chr16 11691563 11691564 chr16:11691237-11691889 chr16 11679080 11742878 ENST00000283033.9 0 - TXNDC11 coding CDS 57
V15 V16 V17
1 ENSG00000069275.12 protein_coding mRNA
2 ENSG00000128335.13 protein_coding mRNA
3 ENSG00000162772.16 protein_coding mRNA
4 ENSG00000163888.3 protein_coding mRNA
5 ENSG00000062650.17 protein_coding mRNA
6 ENSG00000178209.14 protein_coding mRNA
7 ENSG00000084463.7 protein_coding mRNA
8 ENSG00000272994.1 lincRNA Long non-coding RNA
9 ENSG00000266338.6 protein_coding mRNA
10 ENSG00000153066.12 protein_coding mRNA
需要的是V17,所以要提取第17列出来计算每个位置出现的频率,然后用于画饼图
流程:
data <- read.table("PA_NC_specific.anno (distribution).txt", header = F, sep = "\t", quote = "")
#提取第17行,统计频率
data1 <- table(data[,17])
>data1
Long non-coding RNA mRNA Others Pseudogene
1074 7941 108 103
#把data1转换成数据帧
data2 <- as.data.frame(data1)
data2
Var1 Freq
1 Long non-coding RNA 1074
2 mRNA 7941
3 Others 108
4 Pseudogene 103
#方便画图,把第一列的名字改成type
names(data2)[1] <- "type"
data2
type Freq
1 Long non-coding RNA 1074
2 mRNA 7941
3 Others 108
4 Pseudogene 103
#饼图函数
pie_plot <- function(dt){
#colnames must be Freq and type
#####Freq is Freq or percent , type is type or label
library(ggplot2)
library(ggsci)
#dt = dt[order(dt$Freq, decreasing = TRUE),] ## order the Freq
myLabel = paste(dt$type, " ( ", round(dt$Freq/sum(dt$Freq)*100,2), "% ) ", sep = "")
p = ggplot(dt, aes(x = "", y = Freq, fill = type)) +
geom_bar(stat = "identity", width = 1) + scale_fill_jama(name="",label=myLabel)+
coord_polar(theta = "y") + theme_void()+
labs(x = "", y = "", title = "") +
theme(axis.ticks = element_blank()) +
theme(legend.title = element_blank())#, legend.position = "bottom")
return(p)
}
#生成饼图
pie_plot(dt=data2)