安装、加载R包
rm(list=ls())#clear Global Environment
setwd('D:\\桌面\\差异OTU筛选')#设置工作路径
#安装所需R包
# install.packages("BiocManager")
# library(BiocManager)
# BiocManager::install("DESeq2")
# install.packages("ggplot2")
# install.packages('ggrepel')
#加载包
library(DESeq2)
library(ggplot2)
library(ggrepel)
加载数据
#OTU数据
otu <- read.table(file="otu.txt",sep="\t",header=T,
check.names=FALSE ,row.names=1)
#分组数据
group <- read.table(file="group.txt",sep="\t",
header=T,check.names=FALSE,row.names=1 )
group$group<-factor(group$group,levels = c('con','tes'))
OTU差异分析
1、构建DESeqDataSet对象
df_dds <- DESeqDataSetFromMatrix(countData = otu, colData = group, design = ~group)
2、差异分析
df_dds_res <- DESeq(df_dds)
suppressMessages(df_dds_res)
3、提取分析结果
#提取分析结果
df_res <- results(df_dds_res)
# 根据p-value进行重新排序
df_res = df_res[order(df_res$pvalue),]
df_res #查看结果
4、统计结果
summary(df_res)
5、合并数据
df <- merge(as.data.frame(df_res),
as.data.frame(counts(df_dds_res,normalize=TRUE)),
by="row.names",sort=FALSE)
设置阈值并对数据进行分类
#去除空值
df1<-df[!is.na(df$padj),]
#数据分类——根据其中log2FoldChange、padj指标对OTU进行分类
df1$SG<-as.factor(ifelse(df1$padj<0.05&abs(df1$log2FoldChange)>=2,"Y","N"))
#需要注释的差异OTU标签——差异倍数大于6的进行注释,大家可自行根据需要设置
df1$label<-ifelse(df1$padj<0.05&abs(df1$log2FoldChange)>=6,"Y","N")
df1$label<-ifelse(df1$label == 'Y', as.character(df1$Row.names), '')
#图例标签
df1$SG <- factor(df1$SG, levels = c('Y', 'N'), labels = c('differences','No difference'))
使用火山图进行展示
p <- ggplot(df1, aes(log2FoldChange, -log10(padj))) +
geom_point(aes(color = SG),alpha=0.6, size=2)+
theme_bw()+
theme(legend.title = element_blank(),
panel.grid = element_blank()) +
scale_x_continuous(breaks=seq(-10,10, 2))+
geom_vline(xintercept = c(-2, 2), lty=3,color = 'black', lwd=0.5) +
geom_hline(yintercept = -log10(0.05), lty=3,color = 'black', lwd=0.5) +
scale_color_manual(values = c( 'red','grey'))+
labs(title="volcanoplot",
x = 'log2 fold change',
y = '-log10 pvalue')+
geom_text_repel(aes(x = log2FoldChange,
y = -log10(padj),
label=label),
max.overlaps = 1000,
size=3,
box.padding=unit(0.8,'lines'),
point.padding=unit(0.8, 'lines'),
segment.color='black',
show.legend=FALSE)
p