一、加载包
library(ggThemeAssist)
library(ggplot2)
library(PCAtools)
library(tidyverse)
二、读入数据
注意: PCA和聚类树图一般用原始的表达矩阵数据
de_matrix<-read.table("GSE11200_series_matrix.txt",sep="\t",header=T, row.names = 1)
sample_info <- read.csv(file = "GSE11200_sampleinfo.csv", header = T, row.names = 1)
# > de_matrix[1:4, 1:4]
# GSM1 GSM2 GSM3 GSM4
# 7892501 2.070292 2.124219 2.291860 2.405174
# 7892502 3.823558 2.690961 2.725019 3.900261
# 7892503 3.387269 2.678249 3.054946 2.953741
# 7892504 7.631480 6.559667 7.422657 6.576941
# > head(sample_info)
# group Sample_title sub_group
# GSM1 CON control_1 CON
# GSM2 CON control_2 CON
# GSM3 CON control_3 CON
# GSM4 CON control_4 CON
# GSM5 Treat Treat_1 Treat_POST
# GSM6 Treat Treat_2 Treat_high
三、pca分析
pca <- pca(de_matrix, metadata = sample_info)
pca
pca$variance
screeplot(pca) ##根据pca$variance画的解释度
biplot(pca, x = 'PC1', y = 'PC2')
四、将pca数据和分组信息整合到一起
pca_rotated_plus <- rownames_to_column(pca$rotated,
var = 'sample_name') %>%
left_join(rownames_to_column(sample_info, var = 'sample_name'),
by = 'sample_name')
head(pca_rotated_plus)
五、ggplot绘图
可使用ggThemeAssist对图像进行进一步美化
ggplot(pca_rotated_plus, aes(x = PC1, y = PC2)) +
geom_point(size = 8, aes(shape = group, color = sub_group)) +
theme_test()