使用R4.2.3
这里使用DESeq2进行差异表达分析,先加载R包
library(DESeq2)
导入数据
mycounts <- read.csv("1.csv",header = TRUE,row.names = 1)
检查前几列数据,确定数据导入是否准确,并检查共导入多少基因
head(mycounts)
dim(mycounts)
导入分组数据
mymeta<-read.csv("mymeta.csv",stringsAsFactors = T)
定义变量mycountsGroups,并赋值
mycountsGroups <- mymeta$Group
构建用于适用于DESeq2的数据集
colData <- data.frame(row.names=colnames(mycounts),
group_list=mycountsGroups)
dds <- DESeqDataSetFromMatrix(countData = mycounts,
colData = colData,
design = ~group_list,
tidy = F)
过滤低表达基因
keep <- rowSums(counts(dds)>10) >4 #在超过4个library中counts大于10
dds.keep <- dds[keep, ]
dim(dds.keep)
差异分析
degobj <- DESeq(dds.keep)
countdds <- counts(degobj, normalized = T) #提取表达矩阵
deg.res1 <- results(degobj, contrast=c("group_list","A group","B group"), lfcThreshold = 0,
pAdjustMethod = 'fdr') #实验组在前,对照组在后
dim(deg.res1)
padj有时出现NA
apply(deg.res1,2, function(x) sum(is.na(x)))
deg.res1$padj[is.na(deg.res1$padj)] <- 1
table(is.na(deg.res1$padj))
summary(deg.res1) #随后可直接用dplyr分类UP、DOWN
使用dplyr将DEGs分为上调DEGs和下调DEGs并导出
library(dplyr)
res_1<-data.frame(deg.res1)
导出log2(FoldChange)>0.5的DEGs,这里倍数可以调整,选择适合自己实验的参数
res_1 %>%
mutate(group = case_when(log2FoldChange >0.5 & padj < 0.05 ~ "UP",log2FoldChange <-0.5 & padj < 0.05 ~ "DOWN",TRUE ~ "Unchanged")) -> res_0.5
table(res_0.5$group)
write.csv(res_0.5,"res_0.5.csv",row.names = TRUE,col.names = TRUE)##添加label,用于火山图
导出只包含gene symbol的DEGs文件
upsig0.5 <- res_0.5[grep(pattern = "UP",res_0.5[,7]),]
downsig0.5 <- res_0.5[grep(pattern = "DOWN",res_0.5[,7]),]
deg0.5 <- rbind(upsig0.5,downsig0.5)
write.csv(upsig0.5,file="FC0.5upsig10,4.csv",quote = F)#文件命名不宜太长,容易报错
write.csv(downsig0.5,file="FC0.5downsig10,4.csv",quote = F)
write.csv(deg0.5,file="FC0.5deg10,4.csv",quote = F)
火山图
先在excel中添加一列label,将想要在图中标记的基因symbol添加到该列相应行,如下图

res_0.5 <- read.csv("res_0.5.csv",header = TRUE,row.names = 1)
library(ggpubr)
library(ggthemes)
##对差异基因校正后p值(adj.P.Val)进行log10转换
res_0.5$logadj.p.val <- -log10(res_0.5$padj)
colnames(res_0.5)[7] <- "Group"
使用ggrepel包,防止基因名重叠,size调整点大小
library(ggrepel)
options(ggrepel.max.overlaps = Inf)
ggscatter(res_0.5,x="log2FoldChange",y='logadj.p.val',
color = 'Group',
palette = c('#356fae','#000000','#b69e72'),
repel = F,
size = 1,
xlab = 'Log2FoldChange',
ylab = '-Log10(Adjust P-value)')+theme_base()+
theme(legend.position=c(.89,.90),
legend.background = element_rect(fill="white", size=0.1, linetype="solid",colour ="darkblue"))+
geom_hline(yintercept = 1.3,linetype='dashed')+
geom_text_repel(aes(label=label),size=3.5,)+
geom_vline(xintercept = c(-0.5,0.5),linetype='dashed')
GO和KEGG分析
library(clusterProfiler)
library(org.Hs.eg.db)
library(stringr)
library(cowplot)
library(ggplot2)
上调差异基因进行GO和KEGG分析
upsig <- bitr(row.names(upsig0.5), fromType = "SYMBOL",
toType = c( "ENTREZID",'ENSEMBL'),
OrgDb = org.Hs.eg.db)
upgenelist0.5 <- upsig$ENTREZID
data(geneList, package="DOSE")
##分别富集BP,CC,MF
upgo0.5BP <- enrichGO(upgenelist0.5, OrgDb = org.Hs.eg.db, ont='BP',pAdjustMethod = 'fdr',
pvalueCutoff = 0.01, qvalueCutoff = 0.05,keyType = 'ENTREZID',readable= FALSE)
BPsymbol <- setReadable(upgo0.5BP, OrgDb = org.Hs.eg.db, keyType="ENTREZID") #将geneID 转换为gene symbol
write.csv(BPsymbol@result,"upGo_BP.csv",row.names =FALSE)
upgo0.5CC <- enrichGO(upgenelist0.5, OrgDb = org.Hs.eg.db, ont='CC',pAdjustMethod = 'fdr',
pvalueCutoff = 0.01, qvalueCutoff = 0.05,keyType = 'ENTREZID',readable= FALSE)
CCsymbol <- setReadable(upgo0.5CC, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
write.csv(CCsymbol@result,"upGo_CC.csv",row.names =FALSE)
upgo0.5MF <- enrichGO(upgenelist0.5, OrgDb = org.Hs.eg.db, ont='MF',pAdjustMethod = 'fdr',
pvalueCutoff = 0.01, qvalueCutoff = 0.05,keyType = 'ENTREZID',readable= FALSE)
MFsymbol <- setReadable(upgo0.5MF, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
write.csv(MFsymbol@result,"upGo_MF.csv",row.names =FALSE)
KEGG
library(R.utils)
R.utils::setOption("clusterProfiler.download.method",'auto')
upkegg0.5 <- enrichKEGG(upgenelist0.5, organism = 'hsa', keyType = 'kegg', pvalueCutoff = 0.05,
pAdjustMethod = 'BH', minGSSize = 10,maxGSSize = 500,
qvalueCutoff = 0.05,use_internal_data = FALSE)
KEGGsymbol <- setReadable(upkegg0.5, OrgDb = org.Hs.eg.db, keyType="ENTREZID")
write.csv(KEGGsymbol@result,"upKEGG.csv",row.names =FALSE)
下调基因富集分析同上调。