03 PCA和热图
rm(list = ls())
load(file = "step1output.Rdata")
load(file = "step2output.Rdata")
#输入数据:exp和group_list
#Principal Component Analysis
#http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/112-pca-principal-component-analysis-essentials
dat=as.data.frame(t(exp))
library(FactoMineR)#画主成分分析图需要加载这两个包
library(factoextra)
# pca的统一操作走起
dat.pca <- PCA(dat, graph = FALSE)
pca_plot <- fviz_pca_ind(dat.pca,
geom.ind = "point", # show points only (nbut not "text")
col.ind = group_list, # color by groups
#palette = c("#00AFBB", "#E7B800"),
addEllipses = TRUE, # Concentration ellipses
legend.title = "Groups"
)
pca_plot
ggsave(plot = pca_plot,filename = paste0(gse,"PCA.png"))
save(pca_plot,file = "pca_plot.Rdata")
#热图
cg=names(tail(sort(apply(exp,1,sd)),1000))
n=exp[cg,]
#绘制热图
annotation_col=data.frame(group=group_list)
rownames(annotation_col)=colnames(n)
library(pheatmap)
pheatmap(n,
show_colnames =F,
show_rownames = F,
annotation_col=annotation_col,
scale = "row")
dev.off()
04差异分析、增加探针名列,探针ID转换
rm(list = ls())
load(file = "step2output.Rdata")
#差异分析,用limma包来做
#需要表达矩阵和group_list,不需要改
library(limma)
design=model.matrix(~group_list) #根据grouplist生成模型矩阵design
fit=lmFit(exp,design) #从exp得到fit
fit=eBayes(fit)#贝叶斯拟合
deg=topTable(fit,coef=2,number = Inf)#fit里面提取结果得到deg,六列数据logFC和pvalue
#为deg数据框添加几列 探针的行名对应到基因ids
#1.加probe_id列,把行名变成一列#法一:$列名+赋值。法二如下
library(dplyr)
deg <- mutate(deg,probe_id=rownames(deg))
head(deg)
#2.加symbol列,火山图要用
deg <- inner_join(deg,ids,by="probe_id")
head(deg)
#按照symbol列去重复. 1随机 2最大值 3 pingjun
deg <- deg[!duplicated(deg$symbol),]
#3.加change列,标记上下调基因
logFC_t=1
P.Value_t = 0.01
k1 = (deg$P.Value < P.Value_t)&(deg$logFC < -logFC_t)
k2 = (deg$P.Value < P.Value_t)&(deg$logFC > logFC_t)
#sum(k1)下调基因的个数
#ifelse的使用:满足K1则为down,不满足,嵌套下一个ifelse
change = ifelse(k1,"down",ifelse(k2,"up","stable"))
table(change)
deg <- mutate(deg,change)
head(deg)
#4.加ENTREZID列,用于富集分析(symbol转entrezid,然后inner_join)
library(ggplot2)
library(clusterProfiler)
library(org.Hs.eg.db)
s2e <- bitr(deg$symbol,
fromType = "SYMBOL",
toType = "ENTREZID",
OrgDb = org.Hs.eg.db)#人类
#org.Hs.eg.db人类对应ID转换,是用来提供其他物种http://bioconductor.org/packages/release/BiocViews.html#___OrgDb
deg <- inner_join(deg,s2e,by=c("symbol"="SYMBOL"))
save(group_list,deg,logFC_t,P.Value_t,file = "step4output.Rdata")
05 火山图和热图
rm(list = ls())
load(file = "step1output.Rdata")
load(file = "step4output.Rdata")
#1.火山图----
library(dplyr)
library(ggplot2)
dat = deg
p <- ggplot(data = dat,
aes(x = logFC,
y = -log10(P.Value))) +
geom_point(alpha=0.4, size=3.5,
aes(color=change)) +
ylab("-log10(Pvalue)")+
scale_color_manual(values=c("blue", "grey","red"))+
geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",lwd=0.8) +
geom_hline(yintercept = -log10(P.Value_t),lty=4,col="black",lwd=0.8) +
theme_bw()
p
if(T){
#自选基因
for_label <- dat%>%
filter(symbol %in% c("TRPM3","SFRP1"))
}
if(F){
#p值最小的10个
for_label <- dat %>% head(10)
}
if(F) {
#p值最小的前3下调和前3上调
x1 = dat %>%
filter(change == "up") %>%
head(3)
x2 = dat %>%
filter(change == "down") %>%
head(3)
for_label = rbind(x1,x2)
}
volcano_plot <- p +
geom_point(size = 3, shape = 1, data = for_label) +
ggrepel::geom_label_repel(
aes(label = symbol),
data = for_label,
color="black"
)
volcano_plot
ggsave(plot = volcano_plot,filename = paste0(gse,"volcano.png"))
#2.差异基因热图----
load(file = 'step2output.Rdata')
if(F){
#全部差异基因
cg = deg$probe_id[deg$change !="stable"]
length(cg)
}else{
#取前30上调和前30下调
x=deg$logFC[deg$change !="stable"]
names(x)=deg$probe_id[deg$change !="stable"]
cg=c(names(head(sort(x),30)),names(tail(sort(x),30)))
length(cg)
}
n=exp[cg,]
dim(n)
#作热图
library(pheatmap)
annotation_col=data.frame(group=group_list)
rownames(annotation_col)=colnames(n)
library(ggplotify)
heatmap_plot <- as.ggplot(pheatmap(n,show_colnames =F,
show_rownames = F,
scale = "row",
#cluster_cols = F,
annotation_col=annotation_col))
heatmap_plot
ggsave(heatmap_plot,filename = paste0(gse,"heatmap.png"))
load("pca_plot.Rdata")
library(patchwork)
(pca_plot + volcano_plot +heatmap_plot)+ plot_annotation(tag_levels = "A")