PCA和差异基因图是生信技能树[生信爆款入门课程]GEO数据挖掘的重点。为拓展课堂所学知识,现在找一个数据集对他们做下练习总结。
1.主成分PCA 图----
> dat=as.data.frame(t(exp))
> library(FactoMineR)
> library(factoextra)
> dat.pca <- PCA(dat, graph = FALSE)
> pca_plot <- fviz_pca_ind(dat.pca,
+ geom.ind = "point", # show points only (nbut not "text")
+ col.ind = Group, # color by groups
+ palette = c("#00AFBB", "#E7B800"),
+ addEllipses = TRUE, # Concentration ellipses
+ legend.title = "Groups"
+ )
> pca_plot
> ggsave(plot = pca_plot,filename = paste0(gse_number,"_PCA.png"))
Saving 6.4 x 3.77 in image
> save(pca_plot,file = "pca_plot.Rdata")
2.top 1000 sd 热图----
> cg=names(tail(sort(apply(exp,1,sd)),1000))
> n=exp[cg,]
> library(pheatmap)
Warning message:
程辑包‘pheatmap’是用R版本4.0.3 来建造的
> annotation_col=data.frame(group=Group)
> rownames(annotation_col)=colnames(n)
> ## 2.自行标准化再画热图
> n2 = t(scale(t(n)))
> pheatmap(n2,
+ show_colnames =F,
+ show_rownames = F,
+ cluster_cols = F,
+ annotation_col=annotation_col,
+ breaks = seq(-3,3,length.out = 100)
+ )
3.相关性热图
> pheatmap::pheatmap(cor(exp),
+ annotation_col = annotation_col)
>
- 差异基因火山图
1.火山图----
> library(dplyr)
> library(ggplot2)
> dat = deg
> p <- ggplot(data = dat,
+ aes(x = logFC,
+ y = -log10(P.Value))) +
+ geom_point(alpha=0.4, size=3.5,
+ aes(color=change)) +
+ ylab("-log10(Pvalue)")+
+ scale_color_manual(values=c("blue", "grey","red"))+
+ geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",lwd=0.8) +
+ geom_hline(yintercept = -log10(P.Value_t),lty=4,col="black",lwd=0.8) +
+ theme_bw()
> p
> load(file = 'step2output.Rdata')
> if(T){
+ #全部差异基因
+ cg = deg$probe_id[deg$change !="stable"]
+ length(cg)
+ }else{
+ #取前30上调和前30下调
+ x=deg$logFC[deg$change !="stable"]
+ names(x)=deg$probe_id[deg$change !="stable"]
+ cg=names(c(head(sort(x),30),tail(sort(x),30)))
+ length(cg)
+ }
[1] 1616
> n=exp[cg,]
> dim(n)
[1] 1616 22
差异基因热图
> library(pheatmap)
> annotation_col=data.frame(group=Group)
> rownames(annotation_col)=colnames(n)
> heatmap_plot <- pheatmap(n,show_colnames =F,
+ show_rownames = F,
+ scale = "row",
+ cluster_cols = F,
+ annotation_col=annotation_col,
+ breaks = seq(-3,3,length.out = 100)
+ )
> heatmap_plot
> ggsave(heatmap_plot,filename = paste0(gse_number,"_heatmap.png"))
Saving 6.4 x 3.77 in image
> load("pca_plot.Rdata")
3.拼图
> library(patchwork)
Warning message:
程辑包‘patchwork’是用R版本4.0.3 来建造的
> library(ggplotify)
Warning message:
程辑包‘ggplotify’是用R版本4.0.3 来建造的
> (pca_plot + p +as.ggplot(heatmap_plot))
>