#设置环境
getwd()
setwd("当前路径")
rm(list=ls())
options(stringsAsFactors = F)
#导入原始数据,
read.count=read.table("文件所在路径",header=T)
#数据处理
read.matrix1<-read.count[,c(1,7:12)]
rownames(read.matrix)<- read.matrix[,1]
read.matrix$Geneid<-NULL
read.matrix = read.matrix[rowMeans(read.matrix) > 1,]
library(DESeq2) #载入DESeq2的安装包(其他相关联的安装包会自动载入)
##第一次使用时:
if(FALSE){install.packages("BiocManager")
BiocManager::install("DESeq2")
}
read.matrix<-as.matrix(read.matrix)
#设置分组信息以及构建dds对象
condition <- factor(rep(c('treat', 'control'),each = 3))
coldata <- data.frame(row.names=colnames(read.matrix),condition)
dds <-DESeqDataSetFromMatrix(countData = read.matrix,colData = coldata,design =~condition)
#使用DESeq函数进行估计离散度,然后进行标准的差异表达分析,得到res对象结果
dds<-DESeq(dds)
res <- results(dds)
head(dds)
#设定阈值,筛选差异基因,导出数据
table(res$padj <0.05)
res <- res[order(res$padj),]
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)),by="row.names",sort=FALSE)
write.csv(resdata,file = "RNA-seq.csv")
#查看上调下调基因
summary(res)
#基础分析到这里就结束了,下面是可视化分析
参考:https://blog.csdn.net/qq_42458954/article/details/104078845
#提取差异OTU并进行注释
diff_OTU_deseq2 <-subset(res, padj < 0.05 & abs(log2FoldChange) > 1)
dim(diff_OTU_deseq2)
head(diff_OTU_deseq2)
write.csv(diff_OTU_deseq2,file= "DEG_diff_gene.csv")
##火山图
for(i in 1:nrow(resdata)){
if(abs(resdata[i,"log2FoldChange"])>=0.5) resdata[i,"select_change"]<-"y" else resdata[i,"select_change"]<-"n"
if(resdata[i,'padj'] %in% NA | abs(resdata[i,'padj']) >= 0.05) resdata[i,'select_pvalue'] <- 'n' else resdata[i,'select_pvalue'] <- 'y'
resdata[i,'select'] <- paste(resdata[i,'select_change'], resdata[i,'select_pvalue'], sep = '')
}
#对于每个OTU,若log2FoldChange < 0.5,即该OTU在两组间的丰度差异倍数低于2(默认情况下,将差异倍数2判定为分界点),
#则在“resdata”的“select_change”列标记为“n”,反之为“y”;若padj >= 0.05,即校正后的显著性p值未低于0.05的显著性水平,
#则在“resdata”的“select_pvalue”列标记为“n”,反之为“y”。
#同时合并“select_change”和“select_pvalue”的结果,可得到“nn”(p >= 0.05,FC < 2)、“ny”(p < 0.05,FC < 2)、
#“yn”(p >= 0.05,FC >= 2)、“yy”(p < 0.05,FC >= 2)四种组合。
library(ggplot2)
resdata$select <- factor(resdata$select, levels = c('nn', 'ny', 'yn', 'yy'), labels = c('p >= 0.05, FC < 2', 'p < 0.05, FC < 2', 'p >= 0.05, FC >= 2', 'p < 0.05, FC >= 2'))
volcano_plot_pvalue <- ggplot(resdata, aes(log2FoldChange, -log(padj, 10))) +
geom_point(aes(color = select), alpha = 0.6) +
scale_color_manual(values = c('gray30', 'green4', 'red2', 'blue2')) +
theme(panel.grid = element_blank(), panel.background = element_rect(color = 'black', fill = 'transparent')) +
theme(legend.title = element_blank(), legend.key = element_rect(fill = 'transparent'), legend.background = element_rect(fill = 'transparent')) +
geom_vline(xintercept = c(-0.5, 0.5), color = 'gray', size = 0.5) +
geom_hline(yintercept = -log(0.05, 10), color = 'gray', size = 0.5) +
labs(x = 'log2 Fold Change', y = '-log10 p-value')
#输出绘图结果
ggsave('volcano_plot_pvalue.png', volcano_plot_pvalue, width = 6, height = 5)