参考这篇:拿到基因表达矩阵之后的那点事(三)
DESeq2详细用法 💥这篇要好好看!
RNA-seq(6):数据可视化——学习笔记
古歌看
这步之前已经拿到res
!
1. P值分布图:观察原始p值的分布是重要的,期待去看到在较低的p值处有个峰且P值高于0.1时处于均匀分布
library(ggplot2)
ggplot(res, aes(x = pvalue)) +
geom_histogram(bins = 100)
注意这个时候的res
要转为dataframe
状态
2. RLE plot
library(EDASeq)
par(mfrow = c(1, 2))
plotRLE(counts(dds), outline=FALSE, ylim=c(-4, 4),
col=as.numeric(colData$group),
main = 'Raw Counts')
plotRLE(DESeq2::counts(dds, normalized = TRUE),
outline=FALSE, ylim=c(-4, 4),
col = as.numeric(colData$group),
main = 'Normalized Counts')
3. 差异基因画热图
#rlog 标准化
rld <- rlog(dds)
#读取
exprMatrix_rlog <- assay(rld)
diff_gene <- subset(res, padj < 0.05 & (log2FoldChange > 1 | log2FoldChange < -1))
#💥这里因为我的resGeneId列被作为行名,所以这么写
diff_gene$GeneID <- rownames(diff_gene)
diff_data <- as.matrix(exprMatrix_rlog[diff_gene$GeneID, ])
# colorRampPalette(c("navy", "white", "firebrick3"))(50)
hmcol <- rev(colorRampPalette(RColorBrewer::brewer.pal(9, "YlOrRd"))(255))
ann_colors=list(condition=c(C="#377EB8",Tg="#E41A1C")) #设置颜色
#这行运行不出来 跳过也可
colData <- colData%>%tibble::column_to_rownames('Sample')
pheatmap::pheatmap(diff_data, scale="row", color = hmcol,show_rownames = F,fontsize_row=2,annotation_col = colData, annotation_colors=ann_colors,cutree_cols = 2)