α多样性分析需要没有经过归一化的数据
1.安装
BiocManager::install("phyloseq")
BiocManager::install("MicrobiotaProcess")
2.导入文件
rm(list = ls())
library("phyloseq")
library("MicrobiotaProcess")
library("ggplot2")
library("ggpubr")#用于事后检验标记
otu <- "./table-dada2_pe.qza"
rep <- "rep-seqs-dada2_pe.qza"
tree <- "rooted-tree_pe.qza"
tax <- "taxonomy_pe.qza"
sample <- "metadata_pe.txt"
qiimedata <- import_qiime2(otuqza=otu, taxaqza=tax,refseqqza=rep,
mapfilename=sample,treeqza=tree)
qiimedata
#phyloseq-class experiment-level object
#otu_table() OTU Table: [ 1320 taxa and 19 samples ]
#sample_data() Sample Data: [ 19 samples by 2 sample variables ]
#tax_table() Taxonomy Table: [ 1320 taxa by 8 taxonomic ranks ]
#phy_tree() Phylogenetic Tree: [ 1320 tips and 1319 internal nodes ]
#refseq() DNAStringSet: [ 1320 reference sequences ]
查看样品个数
nsamples(qiimedata)
#[1] 25
查看otu个数
ntaxa(qiimedata)
#[1] 1919
3.筛选otu
方法1:保留在所有样本中总序列数大于100的otu
提取总count数量大于100的样品
sub_qiimedata <- prune_taxa(taxa_sums(qiimedata) > =100, qiimedata)
taxa_sums函数可以将qiimedata每一条otu在所有样品中序列数求和
taxa_sums(qiimedata)
#4d335c926abe085bbfc77449f788a5a7 2ec6e10f3d193804e55967e2d7b3d9b0
# 126 221
#ce92babc3ea4623a1678033cba9b3e84 fd513425dcdc4d8c7e35faf166162e4a
# 775 172
#5ae65fbc4c217f8558906c695bb15d99 5936e83d1a8073359fcb4a660ef63083
查看剩余otu个数
ntaxa(sub_qiimedata)
#[1] 399
方法2:去除至少20%样本中未见过3次以上的OTU
sub_qiimedata = filter_taxa(qiimedata,
function(x) sum(x > 3) > (0.2*length(x)), TRUE)
ntaxa(sub_qiimedata)
#[1] 213
方法3:去除序列数少于10000的样本
sub_qiimedata = prune_samples(sample_sums(qiimedata)>=10000, qiimedata)
nsamples(sub_qiimedata)
#[1] 20
4. α多样性分析
4.1 phyloseq包的plot_richness函数
p <- plot_richness(qiimedata, "group",
measures=NULL,
color = "group")+
geom_boxplot(aes(fill=group))+
theme_bw()+xlab(NULL)+
scale_color_aaas()+
scale_fill_aaas(alpha=0.7)
library("ggpubr")#用于事后检验标记
mycompare=list(c("Pre","Post"))
p<-p+stat_compare_means(comparisons=mycompare,
label = "p.signif",
method = 'wilcox')
p
4.2 MicrobiotaProcess包的get_alphaindex函数
alphaobj <- get_alphaindex(qiimedata)
p_alpha <- ggbox(alphaobj, geom="boxplot",
factorNames="group",
p_textsize =3,
signifmap=FALSE)+
theme_bw()+
geom_point(aes(color =group))+
scale_color_aaas()+
scale_fill_aaas()
p_alpha
稀释曲线
按样品
p_rare <- ggrarecurve(obj=qiimedata,
indexNames=c("Observe","Chao1","ACE"),
chunks=30) +
theme(legend.spacing.y=unit(0.02,"cm"),
legend.text=element_text(size=4))+
theme_bw()+
p_rare
分组
p2 <- ggplot(p_rare$data,aes(readsNums,value,color = group))+
geom_point(stat = "summary", fun = mean)+
geom_smooth()+
facet_wrap(~Alpha,scale = "free")+
theme_bw()+
xlab("Number of sequence")+ylab("alpha metric")+
scale_color_aaas()+
scale_fill_aaas()