直接下载该数据集的GEO series matrix 数据 ----
如果是直接下载表达矩阵的话(大多数已经进行标准化和背景校正)
可以查看一下处理是否得当
library(GEOquery)
GSE7014_sm <- getGEO(filename = "/Users/apple/Desktop/练手T2DM/T2DM/GSE7014/GSE7014_series_matrix.txt.gz",
getGPL = F)
GSE7014_expr_sm <- exprs(GSE7014_sm) #提取表达矩阵
GSE7014_pd <- pData(GSE7014_sm) #提取临床信息
# 查看数据的处理方式
GSE7014_pd$data_processing[1]
# 查看数据的表达丰度,看是否经过log2转化
quantile(exprs(GSE7014_sm)[,1])
View(GSE7014_expr_sm)
# 如果表达丰度的数值在50以内,通常是经过log2转化的。
# 如果数字在几百几千,则是未经转化的。
# > quantile(exprs(GSE7014_sm)[,1])
0% 25% 50% 75% 100%
1.34737 97.01700 176.73200 373.24775 23949.60000
# 可以发现,表达丰度极高,所以下一步进行log2转换
# 转换以后绘箱线图观察
# boxplot
dev.new()
par(mfrow = c(1, 1))
par(mar = c(9, 5, 3, 3))
par(cex = 0.8)
boxplot(GSE7014_expr_sm_log, # 表达矩阵
las = 2,
col = rep(c("blue", "red"), each = 20), # 20个实验组
ylab = "Log intensity")
dev.off()
![截屏2021-06-03 下午3.29.23.png](https://upload-images.jianshu.io/upload_images/25203806-30b9f49f3625f257.png?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240)
# 再绘制PCA查看
T_GSE7014_expr_sm_log <- t(GSE7014_expr_sm_log) # 对表达矩阵进行转置
T_remove_GSE7014_expr_sm_log <- T_GSE7014_expr_sm_log[-c(1:10),] # 删除35-44等10个样本(T1DM型)
#用`prcomp`进行PCA分析
pca.results <- prcomp(T_remove_GSE7014_expr_sm_log , center = TRUE, scale. = FALSE)
# 定义足够多的颜色,用于展示分组
mycol <- c("#223D6C","#D20A13","#088247","#FFD121","#11AA4D","#58CDD9","#7A142C","#5D90BA","#431A3D","#91612D","#6E568C","#E0367A","#D8D155","#64495D","#7CC767")
# PCA方法1:自定义R包yyplot
install.packages("ggimage")
install.packages("devtools")
install.packages("qrcode")
devtools::install_github("GuangchuangYu/yyplot")
devtools::install_github('fawda123/ggord')
library(devtools)
require(ggplot2)
require(ggimage)
require(yyplot)
library(plyr)
library(ggord)
pca_matrix <- pca.results$x
# 用ggord画基本PCA图
ggord(pca.results, grp_in = GSE7014_targets$group, repel=TRUE,
ellipse = FALSE, #不显示置信区间背景色
size = 2, #样本的点大小
alpha=0.5, #设置点为半透明,出现叠加的效果
#如果用自定义的颜色,就运行下面这行
cols = mycol[1:length(unique(GSE7014_targets$group))],
arrow = NULL,txt = NULL) + #不画箭头和箭头上的文字
theme(panel.grid =element_blank()) + #去除网格线
# 用yyplot添加置信区间圆圈
geom_ord_ellipse(ellipse_pro = .95, #设置置信区间
size=1.5, #线的粗细
lty=1 ) #实线
#保存到pdf文件
ggsave("PCA_classic.pdf", width = 6, height = 6)
# PCA方法2:
# 定义颜色两组
red = "#e41e25"
green = "#4cb049"
# 颜色分组
color = factor(Targets$group,
labels = c(red,green),
levels = c("T2DM","Control"))
# pca函数: princomp
pca <- princomp(GSE7014_expr_sm_log)
head(pca)
## 可视化
# draw 3d plot--1" 交互式绘图
BiocManager::install("rgl") ##安装rgl R包
install.packages("httpuv")
library(rgl)
plot3d(pca$loadings[,1:3],col=color,
type="s",radius=0.005,
grid=50L,pch=16)