0.起因
最近在群里看到一个图,棒棒糖图的点用饼图代替了。
然后又看到一个文章里,用饼图代替了富集分析的点图,妙啊~
这个地球上有什么漂亮的图能躲过我的小胖手呢?画他!
1.搜索
试了几个不同的关键词,最后这样搜到了。
答案指向一个包,名叫scatterpie。一看作者,啊,这不是Y叔嘛~
2. 跑示例代码
2.1 示例数据
set.seed(123)
long <- rnorm(50, sd=100)
lat <- rnorm(50, sd=50)
d <- data.frame(long=long, lat=lat)
d <- with(d, d[abs(long) < 150 & abs(lat) < 70,])
n <- nrow(d)
d$region <- factor(1:n)
d$A <- abs(rnorm(n, sd=1))
d$B <- abs(rnorm(n, sd=2))
d$C <- abs(rnorm(n, sd=3))
d$D <- abs(rnorm(n, sd=4))
d[1, 4:7] <- d[1, 4:7] * 3
head(d)
## long lat region A B C D
## 1 -56.047565 12.665926 1 2.13121969 8.663359 3.928711 8.676792
## 2 -23.017749 -1.427338 2 0.25688371 1.403569 1.375096 4.945092
## 4 7.050839 68.430114 3 0.24669188 0.524395 3.189978 5.138863
## 5 12.928774 -11.288549 4 0.34754260 3.144288 3.789556 2.295894
## 8 -126.506123 29.230687 5 0.95161857 3.029335 1.048951 2.471943
## 9 -68.685285 6.192712 6 0.04502772 3.203072 2.596539 4.439393
2.2 示例图片
library(ggplot2)
library(scatterpie)
ggplot() +
geom_scatterpie(aes(x=long,
y=lat),
data=d,
cols=LETTERS[1:4]) +
coord_equal()
3.仿造示例数据并画图
3.1找个数据做差异分析
library(tinyarray)
library(dplyr)
library(org.Hs.eg.db)
gse = "GSE42872"
geo = geo_download(gse)
Group = rep(c("control","treat"),each = 3)
Group = factor(Group)
find_anno(geo$gpl)
## [1] "`library(hugene10sttranscriptcluster.db);ids <- toTable(hugene10sttranscriptclusterSYMBOL)` and `ids <- AnnoProbe::idmap('GPL6244')` are both avaliable"
ids <- AnnoProbe::idmap(geo$gpl,destdir = tempdir())
deg = get_deg(geo$exp,Group,ids,logFC_cutoff = 2)
head(deg)
## logFC AveExpr t P.Value adj.P.Val B probe_id
## 1 5.780170 7.370282 82.94833 3.495205e-12 1.163798e-07 16.32898 8133876
## 2 -4.212683 9.106625 -68.40113 1.437468e-11 2.393169e-07 15.71739 7965335
## 3 5.633027 8.763220 57.61985 5.053466e-11 4.431880e-07 15.04752 7972259
## 4 -3.801663 9.726468 -57.21112 5.324059e-11 4.431880e-07 15.01709 7972217
## 5 3.263063 10.171635 50.51733 1.324638e-10 8.821294e-07 14.45166 8129573
## 6 -3.843247 9.667077 -45.87910 2.681063e-10 1.487856e-06 13.97123 8015806
## symbol change ENTREZID
## 1 CD36 up 948
## 2 DUSP6 down 1848
## 3 DCT up 1638
## 4 SPRY2 down 10253
## 5 MOXD1 up 26002
## 6 ETV4 down 2118
table(deg$change)
##
## down stable up
## 48 18092 82
3.2 用差异基因做富集分析
gene_up = deg$symbol[deg$change=="up"]
gene_down = deg$symbol[deg$change=="down"]
gene_diff = c(gene_up,gene_down)
length(gene_diff)
## [1] 130
head(gene_diff)
## [1] "CD36" "DCT" "MOXD1" "NUPR1" "ST6GALNAC2"
## [6] "TXNIP"
g = quick_enrich(gene_diff,kkgo_file = "kk_gop.Rdata")
dat = g$go@result %>%
arrange(pvalue) %>%
filter(ONTOLOGY=="BP") %>%
head(10) %>%
mutate(Description = factor(Description,levels = Description))
p1 = ggplot(dat)+
geom_point(aes(x = -log10(pvalue),y = Description))+
theme_bw()
p1
3.3 计算每条term的上下调基因数量
library(stringr)
p = str_split(dat$geneID,"/")
names(p) = dat$ID
get_num = function(x){
c(sum(x %in% gene_up),sum(x %in% gene_down))
}
re = sapply(p, get_num) %>% t() %>% as.data.frame()
rownames(re) = names(p)
colnames(re) = c("up","down")
3.4 仿制示例数据并画图
试了一下,如果纵坐标是分类变量是会报错的。。。
所以曲线救国,先生成一组连续型数据,然后改刻度值。
而需要保持饼图的形状是圆的,又需要固定坐标系,横纵坐标刻度是一样的。
这时候难度就来了,需要生成的纵坐标得是和横坐标范围一致的等差数列,用seq函数。
dp = cbind(lp = -log10(dat$pvalue),
Description = dat$Description,
re)
dp$lab = seq(min(dp$lp), max(dp$lp), length.out = nrow(dp))
head(dp)
## lp Description up
## GO:0071900 7.264160 regulation of protein serine/threonine kinase activity 5
## GO:0050673 6.929666 epithelial cell proliferation 8
## GO:0043405 6.351294 regulation of MAP kinase activity 5
## GO:0009314 5.748852 response to radiation 6
## GO:0010165 5.691160 response to X-ray 0
## GO:0071214 5.635287 cellular response to abiotic stimulus 6
## down lab
## GO:0071900 10 5.525307
## GO:0050673 8 5.718513
## GO:0043405 5 5.911719
## GO:0009314 8 6.104925
## GO:0010165 5 6.298131
## GO:0071214 6 6.491337
p2 = ggplot() +
geom_scatterpie(aes(x=lp,
y=lab),
data=dp,
cols=colnames(re),
pie_scale = 1.5) +
theme_bw()+
scale_fill_manual(values = c("#f87669", "#2874C5"))+
scale_y_continuous(labels = dp$Description,breaks = dp$lab)+
labs(x = "-log10(pvalue)",y = "Description")+
coord_equal()
p2
3.5 要棒棒糖还不简单嘛
p2 + geom_segment(data = dp, aes(x=as.integer(min(dp$lp)), xend=lp, y=lab, yend=lab))